{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt \n",
    "%matplotlib inline "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Program Files (x86)\\Microsoft Visual Studio\\Shared\\Anaconda3_64\\lib\\site-packages\\IPython\\core\\interactiveshell.py:2785: DtypeWarning: Columns (12,18) have mixed types. Specify dtype option on import or set low_memory=False.\n",
      "  interactivity=interactivity, compiler=compiler, result=result)\n"
     ]
    }
   ],
   "source": [
    "train=pd.read_csv(r'C:\\Users\\Administrator\\Train.csv',encoding=\"gb18030\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                id  Gender           City  Monthly_Income        DOB  \\\n",
      "0      ID000002C20  Female          Delhi           20000  23-May-78   \n",
      "1      ID000004E40    Male         Mumbai           35000   7-Oct-85   \n",
      "2      ID000007H20    Male      Panchkula           22500  10-Oct-81   \n",
      "3      ID000008I30    Male        Saharsa           35000  30-Nov-87   \n",
      "4      ID000009J40    Male      Bengaluru          100000  17-Feb-84   \n",
      "5      ID000010K00    Male      Bengaluru           45000  21-Apr-82   \n",
      "6      ID000011L10  Female     Sindhudurg           70000  23-Oct-87   \n",
      "7      ID000012M20    Male      Bengaluru           20000  25-Jul-75   \n",
      "8      ID000013N30    Male          Kochi           75000  26-Jan-72   \n",
      "9      ID000014O40  Female         Mumbai           30000  12-Sep-89   \n",
      "10     ID000016Q10    Male         Mumbai           25000   1-Jan-76   \n",
      "11     ID000018S30  Female          Surat           25000  13-Oct-89   \n",
      "12     ID000019T40  Female           Pune           24000  22-May-90   \n",
      "13     ID000021V10    Male    Bhubaneswar           27000  24-Jun-82   \n",
      "14     ID000022W20  Female         Howrah           28000   9-Feb-89   \n",
      "15     ID000023X30    Male        Chennai           42000   8-May-82   \n",
      "16     ID000024Y40    Male       Ludhiana           28994  11-Oct-85   \n",
      "17     ID000025Z00  Female          Delhi           20000   6-Jan-90   \n",
      "18     ID000027B20  Female      Bengaluru           33000  14-Jul-76   \n",
      "19     ID000028C30    Male      Panchkula           31500  29-Aug-82   \n",
      "20     ID000029D40    Male        Lucknow           60000  14-Jul-85   \n",
      "21     ID000031F10  Female      Bengaluru           16000   1-Feb-83   \n",
      "22     ID000032G20  Female           Pune           12000  25-Jan-87   \n",
      "23     ID000033H30    Male      Bardhaman           30000  10-Feb-73   \n",
      "24     ID000034I40    Male         Indore           45000  12-Dec-75   \n",
      "25     ID000035J00  Female      Hyderabad           45000  11-Jan-81   \n",
      "26     ID000037L20    Male      Bengaluru           22843   8-Jun-91   \n",
      "27     ID000040O00  Female          Delhi            2900  22-Jul-82   \n",
      "28     ID000041P10  Female        Udaipur            8500  20-May-94   \n",
      "29     ID000043R30    Male         Mumbai          200000   5-Feb-85   \n",
      "...            ...     ...            ...             ...        ...   \n",
      "65505  ID094003N30    Male  Visakhapatnam          150000   3-Aug-91   \n",
      "65506  ID094004O40    Male  Visakhapatnam           80000  18-Apr-88   \n",
      "65507  ID094005P00    Male          Thane           60000  26-Sep-79   \n",
      "65508  ID094006Q10    Male     Chandigarh           12500  13-Oct-88   \n",
      "65509  ID094007R20    Male      Bengaluru           25000  16-Mar-87   \n",
      "65510  ID094009T40    Male          Surat           13009  29-May-80   \n",
      "65511  ID094011V10  Female         Mumbai           13000  25-Jun-79   \n",
      "65512  ID094012W20  Female         Mumbai           23000  29-Feb-88   \n",
      "65513  ID094013X30    Male        Chennai           23000  10-Jan-77   \n",
      "65514  ID094014Y40  Female         Mumbai           20000  14-Aug-85   \n",
      "65515  ID094016A10    Male     Coimbatore           15200   7-Mar-83   \n",
      "65516  ID094018C30  Female      Bengaluru           35000  17-Oct-86   \n",
      "65517  ID094020E00  Female     Vijayawada            8000  29-Jul-89   \n",
      "65518  ID094021F10    Male        Hooghly           30000   4-Oct-89   \n",
      "65519  ID094024I40  Female         Bhopal            7000  26-Oct-91   \n",
      "65520  ID094025J00    Male           Agra           13000  19-Aug-92   \n",
      "65521  ID094027L20    Male          Delhi           60000   9-Feb-84   \n",
      "65522  ID094028M30    Male          Delhi           23800  12-Feb-90   \n",
      "65523  ID094029N40    Male          Noida           98000   2-Jan-91   \n",
      "65524  ID094031P10  Female      Ghaziabad           15100   2-May-88   \n",
      "65525  ID094032Q20    Male      Faridabad           38000  24-Apr-70   \n",
      "65526  ID094033R30  Female         Jaipur           10000  10-Jul-84   \n",
      "65527  ID094035T00    Male      Bengaluru           15800  11-Jan-90   \n",
      "65528  ID094036U10    Male          Thane           18000  11-Nov-79   \n",
      "65529  ID094038W30  Female           Pune           16000  20-Sep-90   \n",
      "65530  ID094040Y00    Male      Bengaluru           16500  11-Jan-87   \n",
      "65531  ID094041Z10  Female        Kolkata           23800  18-Aug-72   \n",
      "65532  ID094042A20    Male          Delhi           45000  12-Nov-85   \n",
      "65533  ID094043B30    Male      Hyderabad           14200  31-Oct-81   \n",
      "65534  ID094044C40  Female    Tirunelveli           64000  19-Dec-85   \n",
      "\n",
      "      Lead_Creation_Date  Loan_Amount_Applied  Loan_Tenure_Applied  \\\n",
      "0              15-May-15             300000.0                  5.0   \n",
      "1               4-May-15             200000.0                  2.0   \n",
      "2              19-May-15             600000.0                  4.0   \n",
      "3               9-May-15            1000000.0                  5.0   \n",
      "4              20-May-15             500000.0                  2.0   \n",
      "5              20-May-15             300000.0                  5.0   \n",
      "6               1-May-15                  6.0                  5.0   \n",
      "7              20-May-15             200000.0                  5.0   \n",
      "8               2-May-15                  0.0                  0.0   \n",
      "9               3-May-15             300000.0                  3.0   \n",
      "10              2-May-15            1000000.0                  5.0   \n",
      "11              2-May-15             140000.0                  4.0   \n",
      "12              2-May-15             500000.0                  4.0   \n",
      "13              9-May-15             200000.0                  5.0   \n",
      "14             13-May-15             100000.0                  1.0   \n",
      "15              5-May-15             500000.0                  3.0   \n",
      "16              8-May-15             300000.0                  5.0   \n",
      "17              1-May-15             100000.0                  5.0   \n",
      "18             24-May-15             500000.0                  5.0   \n",
      "19              1-May-15             500000.0                  5.0   \n",
      "20              1-May-15                  0.0                  0.0   \n",
      "21              1-May-15                  0.0                  0.0   \n",
      "22              1-May-15                  0.0                  0.0   \n",
      "23              1-May-15            1000000.0                  5.0   \n",
      "24              1-May-15                  0.0                  0.0   \n",
      "25              1-May-15             300000.0                  1.0   \n",
      "26              1-May-15             300000.0                  2.0   \n",
      "27              1-May-15                  0.0                  0.0   \n",
      "28              1-May-15             100000.0                  2.0   \n",
      "29              1-May-15            1000000.0                  0.0   \n",
      "...                  ...                  ...                  ...   \n",
      "65505          12-Jul-15             100000.0                  3.0   \n",
      "65506          12-Jul-15            1000000.0                  5.0   \n",
      "65507          12-Jul-15                  0.0                  0.0   \n",
      "65508          12-Jul-15                  0.0                  0.0   \n",
      "65509          12-Jul-15                  0.0                  0.0   \n",
      "65510          12-Jul-15                  0.0                  0.0   \n",
      "65511          12-Jul-15                  0.0                  0.0   \n",
      "65512          12-Jul-15              60000.0                  3.0   \n",
      "65513          12-Jul-15                  0.0                  0.0   \n",
      "65514          12-Jul-15              50000.0                  1.0   \n",
      "65515          12-Jul-15             200000.0                  0.0   \n",
      "65516          12-Jul-15              50000.0                  1.0   \n",
      "65517          12-Jul-15             100000.0                  0.0   \n",
      "65518          12-Jul-15             300000.0                  3.0   \n",
      "65519          12-Jul-15                  0.0                  0.0   \n",
      "65520          12-Jul-15                  0.0                  0.0   \n",
      "65521          12-Jul-15             500000.0                  5.0   \n",
      "65522          12-Jul-15                  0.0                  0.0   \n",
      "65523          12-Jul-15                  0.0                  0.0   \n",
      "65524          12-Jul-15                  0.0                  0.0   \n",
      "65525          12-Jul-15                  0.0                  0.0   \n",
      "65526          12-Jul-15                  0.0                  0.0   \n",
      "65527          12-Jul-15                  0.0                  0.0   \n",
      "65528          12-Jul-15                  0.0                  0.0   \n",
      "65529          12-Jul-15                  0.0                  0.0   \n",
      "65530          12-Jul-15                  0.0                  0.0   \n",
      "65531          12-Jul-15             500000.0                  5.0   \n",
      "65532          12-Jul-15            1000000.0                  5.0   \n",
      "65533          12-Jul-15             100000.0                  3.0   \n",
      "65534          12-Jul-15             500000.0                  5.0   \n",
      "\n",
      "       Existing_EMI                                 Employer_Name    ...     \\\n",
      "0               0.0                                       CYBOSOL    ...      \n",
      "1               0.0           TATA CONSULTANCY SERVICES LTD (TCS)    ...      \n",
      "2               0.0                       ALCHEMIST HOSPITALS LTD    ...      \n",
      "3               0.0                              BIHAR GOVERNMENT    ...      \n",
      "4           25000.0                          GLOBAL EDGE SOFTWARE    ...      \n",
      "5           15000.0  COGNIZANT TECHNOLOGY SOLUTIONS INDIA PVT LTD    ...      \n",
      "6               0.0                          CARNIVAL CRUISE LINE    ...      \n",
      "7            2597.0               GOLDEN TULIP FLORITECH PVT. LTD    ...      \n",
      "8               0.0                                  SIIS PVT LTD    ...      \n",
      "9               0.0                                SOUNDCLOUD.COM    ...      \n",
      "10              0.0                                 KRISHNA KUMAR    ...      \n",
      "11              0.0                        S D JAIN MODERN SCHOOL    ...      \n",
      "12              0.0         K.E.M. HOSPITAL RESEARCH CENTRE, PUNE    ...      \n",
      "13           4600.0                  GI STAFFING SERVICES PVT LTD    ...      \n",
      "14           1200.0                        MCX STOCK EXCHANGE LTD    ...      \n",
      "15              0.0                            SMEC INDIA PVT LTD    ...      \n",
      "16           2550.0                            UNIPARTS INDIA LTD    ...      \n",
      "17              0.0                             INTEC CAPITAL LTD    ...      \n",
      "18           7000.0                                   N RAVIKUMAR    ...      \n",
      "19          10000.0               S P SINGLA CONSTRUCTION PVT LTD    ...      \n",
      "20              0.0                    TCS AND ASSOCIATES PVT LTD    ...      \n",
      "21              0.0                       RELIANCE RETAIL LIMITED    ...      \n",
      "22              0.0                     TERNT HYPERMARKET LIMITED    ...      \n",
      "23           5000.0                                 MD.IDRIS KHAN    ...      \n",
      "24              0.0                                 DILIP SOLANKI    ...      \n",
      "25              0.0             CIGNITI SOFTWARE SERVICES PVT LTD    ...      \n",
      "26              0.0            SYNERGY BUSINESS SOLUTIONS PVT LTD    ...      \n",
      "27              0.0  INVENTIV INTERNATIONAL PHARMA SERVICES P LTD    ...      \n",
      "28              0.0                                      ARC GATE    ...      \n",
      "29              0.0                     APT BUSINESS SERVICES LLP    ...      \n",
      "...             ...                                           ...    ...      \n",
      "65505           0.0             HOSPIRA HEALTH CARE INDIA PVT LTD    ...      \n",
      "65506           0.0                                ICICI BANK LTD    ...      \n",
      "65507           0.0                             HOUSTON HOME LOAN    ...      \n",
      "65508           0.0                 SHRIRAM FORTUNE SOLUTIONS LTD    ...      \n",
      "65509           0.0                    ACCENTURE SERVICES PVT LTD    ...      \n",
      "65510           0.0                     VINOD MEDICAL SYSTEMS PVT    ...      \n",
      "65511           0.0                        ADLABS IMAGICA KHOPOLI    ...      \n",
      "65512        8500.0                 ECLINICAL WORKS INDIA PVT LTD    ...      \n",
      "65513           0.0          ADITYA AUTO PRODUCTS AND ENGINEERING    ...      \n",
      "65514        2000.0                                        SATISH    ...      \n",
      "65515        2900.0                              V TORK CONTROLS     ...      \n",
      "65516        7100.0                    TCS AND ASSOCIATES PVT LTD    ...      \n",
      "65517        2500.0                                     JAGADEESH    ...      \n",
      "65518           0.0                                          ARMY    ...      \n",
      "65519           0.0                                   ROYAL WATCH    ...      \n",
      "65520           0.0                                      GENPACT     ...      \n",
      "65521           0.0     DELL INTERNATIONAL SERVICES INDIA PVT LTD    ...      \n",
      "65522           0.0      EMED LIFE INSURANCE BROKING SERVICES LTD    ...      \n",
      "65523           0.0                    IMPRESSIONS SERVICES P LTD    ...      \n",
      "65524           0.0                   SUNSTAR PRECISION FORGE LTD    ...      \n",
      "65525           0.0         MANPOWER GROUP SERVICES INDIA PVT LTD    ...      \n",
      "65526           0.0                                           OSF    ...      \n",
      "65527           0.0                 KADENCE INTERNATIONAL PVT LTD    ...      \n",
      "65528           0.0                         WATSON PHARMA PVT LTD    ...      \n",
      "65529           0.0                    HDFC LIFE INSURANCE CO LTD    ...      \n",
      "65530           0.0                                     AEGIS LTD    ...      \n",
      "65531        2609.0                 ANTRAWEB TECHNOLOGIES PVT LTD    ...      \n",
      "65532           0.0                   SEA BAUFORMAT INDIA PVT LTD    ...      \n",
      "65533        4444.0                            YUGANDHAR TEKUMUDI    ...      \n",
      "65534           0.0                                     MECON LTD    ...      \n",
      "\n",
      "      Interest_Rate Processing_Fee EMI_Loan_Submitted Filled_Form  \\\n",
      "0               NaN            NaN                NaN           N   \n",
      "1             13.25            NaN             6762.9           N   \n",
      "2               NaN            NaN                NaN           N   \n",
      "3               NaN            NaN                NaN           N   \n",
      "4               NaN            NaN                NaN           N   \n",
      "5             13.99         1500.0            6978.92           N   \n",
      "6               NaN            NaN                NaN           N   \n",
      "7               NaN            NaN                NaN           N   \n",
      "8             14.85        26000.0            30824.7           Y   \n",
      "9             18.25         1500.0            10883.4           N   \n",
      "10            20.00         6600.0              17486           N   \n",
      "11              NaN            NaN                NaN           N   \n",
      "12              NaN            NaN                NaN           N   \n",
      "13            18.00         4500.0            5078.69           N   \n",
      "14              NaN            NaN                NaN           N   \n",
      "15              NaN            NaN                NaN           N   \n",
      "16            15.50         6000.0            7215.96           N   \n",
      "17              NaN            NaN                NaN           N   \n",
      "18              NaN            NaN                NaN           N   \n",
      "19              NaN            NaN                NaN           N   \n",
      "20              NaN            NaN                NaN           N   \n",
      "21              NaN            NaN                NaN           N   \n",
      "22              NaN            NaN                NaN           N   \n",
      "23              NaN            NaN                NaN           N   \n",
      "24              NaN            NaN                NaN           N   \n",
      "25              NaN            NaN                NaN           N   \n",
      "26            20.00         2600.0            13232.9           N   \n",
      "27              NaN            NaN                NaN           N   \n",
      "28              NaN            NaN                NaN           N   \n",
      "29              NaN            NaN                NaN           N   \n",
      "...             ...            ...                ...         ...   \n",
      "65505         16.00         1000.0             3515.7           N   \n",
      "65506           NaN            NaN                NaN           N   \n",
      "65507         13.99         6300.0           29311.46           Y   \n",
      "65508         18.25         1900.0             5606.1           Y   \n",
      "65509         13.00         3360.0           11267.55           Y   \n",
      "65510           NaN            NaN                NaN           N   \n",
      "65511           NaN            NaN                NaN           N   \n",
      "65512           NaN            NaN                NaN           N   \n",
      "65513         28.50         7800.0           13704.15           Y   \n",
      "65514           NaN            NaN                NaN           N   \n",
      "65515           NaN            NaN                NaN           N   \n",
      "65516           NaN            NaN                NaN           N   \n",
      "65517           NaN            NaN                NaN           N   \n",
      "65518         19.75         6000.0            11110.9           N   \n",
      "65519           NaN            NaN                NaN           N   \n",
      "65520         37.00         3800.0            7635.84           Y   \n",
      "65521         13.99         2000.0           11631.53           N   \n",
      "65522         16.75         6600.0           11724.38           Y   \n",
      "65523         15.50        24000.0           28863.83           Y   \n",
      "65524           NaN            NaN                NaN           N   \n",
      "65525         13.99         3450.0           18851.81           Y   \n",
      "65526           NaN            NaN                NaN           N   \n",
      "65527         31.50         5000.0            9220.91           Y   \n",
      "65528         14.25         3360.0            9827.19           Y   \n",
      "65529         31.50         5200.0            9589.74           Y   \n",
      "65530         18.25         2600.0            7671.51           Y   \n",
      "65531           NaN            NaN                NaN           N   \n",
      "65532           NaN            NaN                NaN           N   \n",
      "65533         31.50         1800.0            3894.96           N   \n",
      "65534           NaN            NaN                NaN           N   \n",
      "\n",
      "       Device_Type  Var2  Source  Var4 LoggedIn Disbursed  \n",
      "0      Web-browser     G    S122     1        0       0.0  \n",
      "1      Web-browser     G    S122     3        0       0.0  \n",
      "2      Web-browser     B    S143     1        0       0.0  \n",
      "3      Web-browser     B    S143     3        0       0.0  \n",
      "4      Web-browser     B    S134     3        1       0.0  \n",
      "5      Web-browser     B    S143     3        1       0.0  \n",
      "6      Web-browser     B    S133     1        0       0.0  \n",
      "7      Web-browser     B    S159     3        0       0.0  \n",
      "8           Mobile     C    S122     5        0       0.0  \n",
      "9      Web-browser     B    S133     1        0       0.0  \n",
      "10     Web-browser     B    S133     4        0       0.0  \n",
      "11     Web-browser     B    S122     1        0       0.0  \n",
      "12     Web-browser     B    S133     1        0       0.0  \n",
      "13     Web-browser     B    S133     4        0       0.0  \n",
      "14     Web-browser     B    S151     1        0       0.0  \n",
      "15     Web-browser     B    S159     3        0       0.0  \n",
      "16     Web-browser     E    S122     1        1       0.0  \n",
      "17     Web-browser     B    S122     1        0       0.0  \n",
      "18     Web-browser     E    S133     1        0       0.0  \n",
      "19     Web-browser     E    S133     3        0       0.0  \n",
      "20          Mobile     F    S133     2        0       0.0  \n",
      "21          Mobile     C    S133     1        0       0.0  \n",
      "22          Mobile     C    S133     1        0       0.0  \n",
      "23     Web-browser     E    S133     3        0       0.0  \n",
      "24     Web-browser     E    S133     3        0       0.0  \n",
      "25     Web-browser     B    S133     3        0       0.0  \n",
      "26     Web-browser     E    S133     4        0       0.0  \n",
      "27          Mobile     C    S133     1        0       0.0  \n",
      "28     Web-browser     B    S133     1        0       0.0  \n",
      "29     Web-browser     B    S159     2        0       0.0  \n",
      "...            ...   ...     ...   ...      ...       ...  \n",
      "65505  Web-browser     G    S122     4        0       0.0  \n",
      "65506  Web-browser     G    S122     2        0       0.0  \n",
      "65507       Mobile     G    S122     5        0       0.0  \n",
      "65508       Mobile     G    S122     5        0       0.0  \n",
      "65509       Mobile     G    S122     5        0       0.0  \n",
      "65510       Mobile     G    S122     3        0       0.0  \n",
      "65511       Mobile     G    S122     1        0       0.0  \n",
      "65512  Web-browser     G    S122     3        0       0.0  \n",
      "65513       Mobile     G    S122     5        0       0.0  \n",
      "65514  Web-browser     G    S122     1        0       0.0  \n",
      "65515  Web-browser     G    S122     3        0       0.0  \n",
      "65516  Web-browser     G    S122     1        0       0.0  \n",
      "65517  Web-browser     G    S122     1        0       0.0  \n",
      "65518  Web-browser     G    S122     3        0       0.0  \n",
      "65519       Mobile     G    S122     1        0       0.0  \n",
      "65520       Mobile     G    S122     5        0       0.0  \n",
      "65521  Web-browser     G    S122     3        0       0.0  \n",
      "65522       Mobile     G    S122     5        0       0.0  \n",
      "65523       Mobile     G    S122     5        0       0.0  \n",
      "65524       Mobile     G    S122     1        0       0.0  \n",
      "65525       Mobile     G    S122     5        0       0.0  \n",
      "65526  Web-browser     G    S122     7        0       0.0  \n",
      "65527       Mobile     G    S122     5        0       0.0  \n",
      "65528       Mobile     G    S122     5        0       0.0  \n",
      "65529       Mobile     G    S122     5        0       0.0  \n",
      "65530       Mobile     G    S122     5        0       0.0  \n",
      "65531  Web-browser     G    S122     1        0       0.0  \n",
      "65532  Web-browser     G    S122     3        0       0.0  \n",
      "65533  Web-browser     G    S122     4        0       0.0  \n",
      "65534  Web-browser     G    S122     3        0       0.0  \n",
      "\n",
      "[65535 rows x 26 columns]\n"
     ]
    }
   ],
   "source": [
    "print(train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                id  Gender                  City  Monthly_Income        DOB  \\\n",
      "0      ID000026A10    Male              Dehradun           21500   3-Apr-87   \n",
      "1      ID000054C40    Male                Mumbai           42000  12-May-80   \n",
      "2      ID000066O10  Female                Jaipur           10000  19-Sep-89   \n",
      "3      ID000110G00  Female               Chennai           14650  15-Aug-91   \n",
      "4      ID000113J30    Male               Chennai           23400  22-Jul-87   \n",
      "5      ID000133D30  Female               Gurgaon           15000  25-Oct-86   \n",
      "6      ID000156A10  Female             Bengaluru           69000  22-Jul-72   \n",
      "7      ID000162G20  Female                Mumbai           20555  18-Aug-90   \n",
      "8      ID000174S40    Male                 Surat           30000   6-Jun-90   \n",
      "9      ID000175T00    Male                 Delhi           40400  13-May-83   \n",
      "10     ID000178W30    Male             Bengaluru           17000   3-May-89   \n",
      "11     ID000199R40  Female              Tirupati           24800  17-Sep-85   \n",
      "12     ID000235B00  Female             Ahmedabad           10000  13-Nov-95   \n",
      "13     ID000240G00    Male             Faridabad           25600  15-Nov-89   \n",
      "14     ID000247N20  Female                 Delhi           10500   1-Sep-87   \n",
      "15     ID000267H20    Male                Mumbai           30000  22-Nov-86   \n",
      "16     ID000289D40  Female                 Delhi          100000   7-Feb-79   \n",
      "17     ID000291F10    Male         Visakhapatnam           29600  20-Apr-88   \n",
      "18     ID000301P10    Male             Allahabad           38000   4-Mar-72   \n",
      "19     ID000320I00  Female             Bengaluru           15000  29-Jul-87   \n",
      "20     ID000323L30    Male                 Delhi           20000  16-Jun-91   \n",
      "21     ID000339B40  Female             Panchkula           10190  15-Jun-75   \n",
      "22     ID000364A40  Female                Nashik           12680  28-Jun-84   \n",
      "23     ID000389Z40    Male               Gurgaon           30000   2-Aug-84   \n",
      "24     ID000400K00    Male   Gautam Buddha Nagar           20600  12-Jul-83   \n",
      "25     ID000406Q10  Female             Bengaluru           65000  12-Jul-87   \n",
      "26     ID000410U00    Male                  Pune           25300   6-Nov-88   \n",
      "27     ID000417B20    Male                 Delhi           24000   8-Jun-85   \n",
      "28     ID000420E00  Female              Guwahati           10000  16-Jun-91   \n",
      "29     ID000429N40    Male  Kamrup Metropolitian           15500  25-May-79   \n",
      "...            ...     ...                   ...             ...        ...   \n",
      "37687  ID124713R30  Female                 Delhi             922  13-Dec-88   \n",
      "37688  ID124715T00  Female                Mumbai          900000  19-Dec-61   \n",
      "37689  ID124719X40  Female                 Delhi           45000   7-May-90   \n",
      "37690  ID124720Y00    Male             Hyderabad           13000  19-Jun-93   \n",
      "37691  ID124724C40  Female             Hyderabad           19000  21-May-91   \n",
      "37692  ID124725D00  Female            Vijayawada           20500   1-Apr-72   \n",
      "37693  ID124726E10  Female             Faridabad           12000   8-Jul-91   \n",
      "37694  ID124735N00    Male                  Pune           16000   4-Sep-88   \n",
      "37695  ID124741T10  Female                 Delhi           10500  21-Feb-91   \n",
      "37696  ID124744W40  Female                 Delhi           15500  21-Feb-91   \n",
      "37697  ID124745X00  Female             Hyderabad              32  30-Jun-91   \n",
      "37698  ID124748A30    Male         Visakhapatnam           30000   8-Nov-89   \n",
      "37699  ID124755H00  Female                Indore            1250   9-Mar-92   \n",
      "37700  ID124760M00    Male                 Delhi           25000  19-Jul-92   \n",
      "37701  ID124762O20    Male                  Pune           23550  16-Feb-89   \n",
      "37702  ID124764Q40    Male                Mumbai           21000   5-Feb-60   \n",
      "37703  ID124769V40  Female                Mumbai           10000  31-Dec-93   \n",
      "37704  ID124770W00  Female                 Delhi           16000  14-Nov-91   \n",
      "37705  ID124779F40  Female             Ahmedabad           10000  10-Aug-94   \n",
      "37706  ID124781H10    Male             Hyderabad           28200  15-Mar-75   \n",
      "37707  ID124784K40    Male             Hyderabad           12400  19-Sep-79   \n",
      "37708  ID124800A00    Male                 Delhi           40000  28-Jun-85   \n",
      "37709  ID124801B10    Male             Bengaluru          110000   2-Aug-86   \n",
      "37710  ID124805F00    Male                 Surat           20000  22-Jun-92   \n",
      "37711  ID124807H20  Female             Bengaluru           10000   1-May-89   \n",
      "37712  ID124809J40  Female                 Ponda           40000  28-Dec-71   \n",
      "37713  ID124817R20    Male            Chandigarh           20500   9-Feb-84   \n",
      "37714  ID124820U00    Male                Meerut           25000  18-Dec-69   \n",
      "37715  ID124822W20  Female               Kolkata          130000   1-Jan-72   \n",
      "37716  ID124823X30    Male                 Delhi           30000   4-Oct-88   \n",
      "\n",
      "      Lead_Creation_Date  Loan_Amount_Applied  Loan_Tenure_Applied  \\\n",
      "0               5-May-15             100000.0                  3.0   \n",
      "1               1-May-15                  0.0                  0.0   \n",
      "2               1-May-15             300000.0                  2.0   \n",
      "3               1-May-15                  0.0                  0.0   \n",
      "4               1-May-15             100000.0                  1.0   \n",
      "5               1-May-15             100000.0                  0.0   \n",
      "6               1-May-15             200000.0                  5.0   \n",
      "7               1-May-15             100000.0                  2.0   \n",
      "8               1-May-15             100000.0                  1.0   \n",
      "9               1-May-15             300000.0                  4.0   \n",
      "10              1-May-15                  0.0                  0.0   \n",
      "11              1-May-15             500000.0                  4.0   \n",
      "12              1-May-15             200000.0                  3.0   \n",
      "13              1-May-15             200000.0                  5.0   \n",
      "14              1-May-15             150000.0                  5.0   \n",
      "15              1-May-15             300000.0                  3.0   \n",
      "16              1-May-15            1000000.0                  5.0   \n",
      "17              1-May-15                  0.0                  0.0   \n",
      "18              1-May-15                  0.0                  0.0   \n",
      "19              1-May-15                  0.0                  0.0   \n",
      "20              1-May-15             100000.0                  2.0   \n",
      "21              1-May-15             100000.0                  5.0   \n",
      "22              1-May-15             200000.0                  5.0   \n",
      "23              1-May-15                  0.0                  0.0   \n",
      "24              1-May-15             100000.0                  1.0   \n",
      "25              1-May-15             200000.0                  0.0   \n",
      "26              1-May-15              74000.0                  2.0   \n",
      "27              1-May-15             200000.0                  4.0   \n",
      "28              1-May-15             100000.0                  1.0   \n",
      "29              1-May-15             100000.0                  3.0   \n",
      "...                  ...                  ...                  ...   \n",
      "37687          31-Jul-15                  0.0                  0.0   \n",
      "37688          31-Jul-15             100000.0                  2.0   \n",
      "37689          31-Jul-15             100000.0                  1.0   \n",
      "37690          31-Jul-15             200000.0                  3.0   \n",
      "37691          31-Jul-15                  0.0                  0.0   \n",
      "37692          31-Jul-15                  NaN                  NaN   \n",
      "37693          31-Jul-15                  0.0                  0.0   \n",
      "37694          31-Jul-15             100000.0                  3.0   \n",
      "37695          31-Jul-15                  0.0                  0.0   \n",
      "37696          31-Jul-15                  0.0                  0.0   \n",
      "37697          31-Jul-15                  0.0                  0.0   \n",
      "37698          31-Jul-15                  0.0                  0.0   \n",
      "37699          31-Jul-15                  0.0                  0.0   \n",
      "37700          31-Jul-15                  0.0                  0.0   \n",
      "37701          31-Jul-15                  0.0                  0.0   \n",
      "37702          31-Jul-15             500000.0                  5.0   \n",
      "37703          31-Jul-15                  0.0                  0.0   \n",
      "37704          31-Jul-15             100000.0                  3.0   \n",
      "37705          31-Jul-15                  0.0                  0.0   \n",
      "37706          31-Jul-15                  0.0                  0.0   \n",
      "37707          31-Jul-15                  0.0                  0.0   \n",
      "37708          31-Jul-15                  0.0                  0.0   \n",
      "37709          31-Jul-15                  0.0                  0.0   \n",
      "37710          31-Jul-15                  0.0                  0.0   \n",
      "37711          31-Jul-15             100000.0                  2.0   \n",
      "37712          31-Jul-15            1000000.0                  4.0   \n",
      "37713          31-Jul-15                  0.0                  0.0   \n",
      "37714          31-Jul-15                  0.0                  0.0   \n",
      "37715          31-Jul-15             600000.0                  0.0   \n",
      "37716          31-Jul-15             100000.0                  1.0   \n",
      "\n",
      "       Existing_EMI                                      Employer_Name ...   \\\n",
      "0               0.0                                         APTARA INC ...    \n",
      "1               0.0                                           ATUL LTD ...    \n",
      "2               0.0                                  SHAREKHAN PVT LTD ...    \n",
      "3               0.0                      MAERSK GLOBAL SERVICE CENTRES ...    \n",
      "4            5000.0                                             SCHAWK ...    \n",
      "5            4500.0                             SSL LOGISTICS PVT LTD. ...    \n",
      "6           30000.0                    NATIONAL AEROSPACE LABORATORIES ...    \n",
      "7            7497.0           EMERSON PROCESS MANAGEMENT INDIA PVT LTD ...    \n",
      "8               0.0                                         ANKIT GEMS ...    \n",
      "9               0.0                      IHG IT SERVICES INDIA PVT LTD ...    \n",
      "10              0.0            CONCENTRIX DAKSH SERVICES INDIA PVT LTD ...    \n",
      "11              0.0                                         SREEHARI B ...    \n",
      "12           2500.0                                         SELSE JOBS ...    \n",
      "13              0.0                              PAYU PAYMENTS PVT LTD ...    \n",
      "14           2500.0                                       DEEP HYUNDAI ...    \n",
      "15           6000.0                       YASHPAL CONSTRUCTION PVT LTD ...    \n",
      "16          41000.0                                       BROOKE INDIA ...    \n",
      "17              0.0                                     SYNDICATE BANK ...    \n",
      "18              0.0                                 GOVERNMENT SERVICE ...    \n",
      "19              0.0                   PARAMOUNT CORPORATE  NETWORK LTD ...    \n",
      "20              0.0                                   POLICYBAZAAR.COM ...    \n",
      "21              0.0                                 VENUS REMEDIES LTD ...    \n",
      "22           1880.0                                      HDFC BANK LTD ...    \n",
      "23              0.0                 GENPACT GE CAPITAL INTERNATNL SERV ...    \n",
      "24              0.0                 RAMTECH SOFTWARE SOLUTIONS PVT LTD ...    \n",
      "25          10000.0               CADENCE DESIGN SYSTEMS INDIA PVT LTD ...    \n",
      "26              0.0                               INFOSYS TECHNOLOGIES ...    \n",
      "27              0.0                                     ICICI BANK LTD ...    \n",
      "28           2500.0                                            PICK ME ...    \n",
      "29              0.0                          BRAHMUPTRA LOGISTICS PLTD ...    \n",
      "...             ...                                                ... ...    \n",
      "37687           0.0                                                  0 ...    \n",
      "37688       45000.0                               HCL TECHNOLOGIES LTD ...    \n",
      "37689        6200.0                        XAVIENT INFORMATION SYSTEMS ...    \n",
      "37690           0.0  PINKERTON CORPORATE RISK MANAGEMENT INDIA PVT LTD ...    \n",
      "37691           0.0                              CLINASIA LABS PVT LTD ...    \n",
      "37692           NaN                                                NaN ...    \n",
      "37693           0.0                                     ICICI BANK LTD ...    \n",
      "37694           0.0                                     INDIA INFOLINE ...    \n",
      "37695           0.0                             SITI CABLE NETWORK LTD ...    \n",
      "37696           0.0                             SITI CABLE NETWORK LTD ...    \n",
      "37697           0.0                TATA CONSULTANCY SERVICES LTD (TCS) ...    \n",
      "37698           0.0                                            DEFENCE ...    \n",
      "37699           0.0                                                  0 ...    \n",
      "37700           0.0                         CSC INDIA SOFTTECH PVT LTD ...    \n",
      "37701           0.0                         MINDGATE SOLUTIONS PVT LTD ...    \n",
      "37702        3560.0                                    SEA GREEN HOTEL ...    \n",
      "37703           0.0                                                 SR ...    \n",
      "37704           0.0                 QUATRRO MORTGAGE SOLUTIONS PVT LTD ...    \n",
      "37705           0.0                                         ICICI BANK ...    \n",
      "37706           0.0                               TSB SPORTS INDIA LLP ...    \n",
      "37707           0.0                             GETIT INFOSERVICES LTD ...    \n",
      "37708           0.0                         TCS AND ASSOCIATES PVT LTD ...    \n",
      "37709           0.0  INTUIT INDIA TECHNOLOGY TECHNOLOGY SERVICES PV... ...    \n",
      "37710           0.0                                            DIAMOND ...    \n",
      "37711        3500.0                                             FAROOQ ...    \n",
      "37712       10000.0                                      GOVT EMPLOYEE ...    \n",
      "37713           0.0               MAX SUPER SPECIALITY HOSPITAL MOHALI ...    \n",
      "37714           0.0                                            C.I.S.F ...    \n",
      "37715       67000.0                                     TATA STEEL LTD ...    \n",
      "37716        4364.0             NINE A BUSINESS CONNECT PRIVATE LIMTED ...    \n",
      "\n",
      "      Loan_Amount_Submitted Loan_Tenure_Submitted  Interest_Rate  \\\n",
      "0                  100000.0                   3.0          20.00   \n",
      "1                  690000.0                   5.0          24.00   \n",
      "2                       NaN                   NaN            NaN   \n",
      "3                       NaN                   NaN            NaN   \n",
      "4                  100000.0                   2.0            NaN   \n",
      "5                       NaN                   NaN            NaN   \n",
      "6                       NaN                   NaN            NaN   \n",
      "7                       NaN                   NaN            NaN   \n",
      "8                  100000.0                   1.0            NaN   \n",
      "9                  300000.0                   4.0            NaN   \n",
      "10                 390000.0                   5.0          18.50   \n",
      "11                      NaN                   NaN            NaN   \n",
      "12                      NaN                   NaN            NaN   \n",
      "13                 200000.0                   5.0          16.75   \n",
      "14                      NaN                   NaN            NaN   \n",
      "15                 300000.0                   3.0            NaN   \n",
      "16                      NaN                   NaN            NaN   \n",
      "17                 440000.0                   5.0          13.99   \n",
      "18                 610000.0                   5.0          18.00   \n",
      "19                      NaN                   NaN            NaN   \n",
      "20                 100000.0                   2.0            NaN   \n",
      "21                      NaN                   NaN            NaN   \n",
      "22                 130000.0                   5.0          13.00   \n",
      "23                 450000.0                   5.0          31.00   \n",
      "24                 100000.0                   1.0            NaN   \n",
      "25                      NaN                   NaN            NaN   \n",
      "26                  70000.0                   2.0            NaN   \n",
      "27                 200000.0                   4.0            NaN   \n",
      "28                      NaN                   NaN            NaN   \n",
      "29                 100000.0                   3.0          20.00   \n",
      "...                     ...                   ...            ...   \n",
      "37687                   NaN                   NaN            NaN   \n",
      "37688                   NaN                   NaN            NaN   \n",
      "37689                   NaN                   NaN            NaN   \n",
      "37690              180000.0                   3.0          31.50   \n",
      "37691              320000.0                   4.0          18.25   \n",
      "37692                   NaN                   NaN            NaN   \n",
      "37693                   NaN                   NaN            NaN   \n",
      "37694              100000.0                   3.0            NaN   \n",
      "37695                   NaN                   NaN            NaN   \n",
      "37696                   NaN                   NaN            NaN   \n",
      "37697                   NaN                   NaN            NaN   \n",
      "37698              550000.0                   4.0            NaN   \n",
      "37699                   NaN                   NaN            NaN   \n",
      "37700                   NaN                   NaN            NaN   \n",
      "37701              400000.0                   4.0          18.15   \n",
      "37702              490000.0                   5.0            NaN   \n",
      "37703                   NaN                   NaN            NaN   \n",
      "37704                   NaN                   NaN            NaN   \n",
      "37705                   NaN                   NaN            NaN   \n",
      "37706              530000.0                   4.0          27.00   \n",
      "37707              200000.0                   4.0          31.50   \n",
      "37708              720000.0                   4.0            NaN   \n",
      "37709             1500000.0                   4.0            NaN   \n",
      "37710              320000.0                   4.0            NaN   \n",
      "37711                   NaN                   NaN            NaN   \n",
      "37712                   NaN                   NaN            NaN   \n",
      "37713              330000.0                   4.0          31.50   \n",
      "37714              390000.0                   4.0            NaN   \n",
      "37715                   NaN                   NaN            NaN   \n",
      "37716              100000.0                   1.0            NaN   \n",
      "\n",
      "      Processing_Fee  EMI_Loan_Submitted  Filled_Form  Device_Type  Var2  \\\n",
      "0             1000.0             2649.39            N  Web-browser     B   \n",
      "1            13800.0            19849.90            Y       Mobile     C   \n",
      "2                NaN                 NaN            N  Web-browser     B   \n",
      "3                NaN                 NaN            N       Mobile     C   \n",
      "4                NaN                 NaN            N  Web-browser     B   \n",
      "5                NaN                 NaN            N  Web-browser     B   \n",
      "6                NaN                 NaN            N  Web-browser     B   \n",
      "7                NaN                 NaN            N  Web-browser     B   \n",
      "8                NaN                 NaN            N  Web-browser     B   \n",
      "9                NaN                 NaN            N  Web-browser     B   \n",
      "10            3900.0            10009.82            Y       Mobile     C   \n",
      "11               NaN                 NaN            N  Web-browser     B   \n",
      "12               NaN                 NaN            N  Web-browser     B   \n",
      "13               NaN             4943.67            Y  Web-browser     B   \n",
      "14               NaN                 NaN            N  Web-browser     B   \n",
      "15               NaN                 NaN            N  Web-browser     B   \n",
      "16               NaN                 NaN            N  Web-browser     B   \n",
      "17            8800.0            10235.75            Y       Mobile     C   \n",
      "18           13725.0            15489.99            Y       Mobile     C   \n",
      "19               NaN                 NaN            N       Mobile     C   \n",
      "20               NaN                 NaN            N  Web-browser     B   \n",
      "21               NaN                 NaN            N  Web-browser     E   \n",
      "22             999.0             2957.90            N  Web-browser     B   \n",
      "23           11250.0            14836.64            Y       Mobile     C   \n",
      "24               NaN                 NaN            N  Web-browser     B   \n",
      "25               NaN                 NaN            N  Web-browser     B   \n",
      "26               NaN                 NaN            N  Web-browser     B   \n",
      "27               NaN                 NaN            N  Web-browser     B   \n",
      "28               NaN                 NaN            N  Web-browser     B   \n",
      "29            1000.0             3716.36            N  Web-browser     B   \n",
      "...              ...                 ...          ...          ...   ...   \n",
      "37687            NaN                 NaN            N       Mobile     G   \n",
      "37688            NaN                 NaN            N  Web-browser     G   \n",
      "37689            NaN                 NaN            N  Web-browser     G   \n",
      "37690         3600.0             7789.92            N  Web-browser     G   \n",
      "37691         3200.0             9441.85            Y       Mobile     G   \n",
      "37692            NaN                 NaN            N  Web-browser     G   \n",
      "37693            NaN                 NaN            N       Mobile     G   \n",
      "37694            NaN                 NaN            N  Web-browser     G   \n",
      "37695            NaN                 NaN            N       Mobile     G   \n",
      "37696            NaN                 NaN            N       Mobile     G   \n",
      "37697            NaN                 NaN            N       Mobile     G   \n",
      "37698            NaN                 NaN            N       Mobile     G   \n",
      "37699            NaN                 NaN            N       Mobile     G   \n",
      "37700            NaN                 NaN            N  Web-browser     G   \n",
      "37701         4000.0            11781.38            Y       Mobile     G   \n",
      "37702            NaN                 NaN            N  Web-browser     G   \n",
      "37703            NaN                 NaN            N       Mobile     G   \n",
      "37704            NaN                 NaN            N  Web-browser     G   \n",
      "37705            NaN                 NaN            N       Mobile     G   \n",
      "37706        10600.0            18169.63            Y       Mobile     G   \n",
      "37707         4000.0             7376.73            Y       Mobile     G   \n",
      "37708            NaN                 NaN            N       Mobile     G   \n",
      "37709            NaN                 NaN            N       Mobile     G   \n",
      "37710            NaN                 NaN            N       Mobile     G   \n",
      "37711            NaN                 NaN            N  Web-browser     G   \n",
      "37712            NaN                 NaN            N  Web-browser     G   \n",
      "37713         6600.0            12171.60            Y       Mobile     G   \n",
      "37714            NaN                 NaN            N       Mobile     G   \n",
      "37715            NaN                 NaN            N  Web-browser     G   \n",
      "37716            NaN                 NaN            N  Web-browser     G   \n",
      "\n",
      "       Source Var4  \n",
      "0        S122    3  \n",
      "1        S133    5  \n",
      "2        S133    1  \n",
      "3        S133    1  \n",
      "4        S143    1  \n",
      "5        S133    1  \n",
      "6        S134    1  \n",
      "7        S133    1  \n",
      "8        S156    2  \n",
      "9        S133    3  \n",
      "10       S133    5  \n",
      "11       S159    3  \n",
      "12       S133    1  \n",
      "13       S133    5  \n",
      "14       S122    1  \n",
      "15       S134    2  \n",
      "16       S133    1  \n",
      "17       S133    5  \n",
      "18       S127    5  \n",
      "19       S133    1  \n",
      "20       S122    2  \n",
      "21       S133    1  \n",
      "22       S151    4  \n",
      "23       S133    5  \n",
      "24       S133    3  \n",
      "25       S133    4  \n",
      "26       S133    2  \n",
      "27       S133    3  \n",
      "28       S133    1  \n",
      "29       S159    4  \n",
      "...       ...  ...  \n",
      "37687    S122    1  \n",
      "37688    S122    3  \n",
      "37689    S122    3  \n",
      "37690    S122    4  \n",
      "37691    S122    5  \n",
      "37692    S122    1  \n",
      "37693    S122    1  \n",
      "37694    S122    3  \n",
      "37695    S122    1  \n",
      "37696    S122    1  \n",
      "37697    S122    1  \n",
      "37698    S122    3  \n",
      "37699    S122    1  \n",
      "37700    S122    7  \n",
      "37701    S122    5  \n",
      "37702    S122    1  \n",
      "37703    S122    1  \n",
      "37704    S122    1  \n",
      "37705    S122    1  \n",
      "37706    S122    5  \n",
      "37707    S122    5  \n",
      "37708    S122    3  \n",
      "37709    S122    3  \n",
      "37710    S122    3  \n",
      "37711    S122    1  \n",
      "37712    S122    3  \n",
      "37713    S122    5  \n",
      "37714    S122    3  \n",
      "37715    S122    1  \n",
      "37716    S122    3  \n",
      "\n",
      "[37717 rows x 24 columns]\n"
     ]
    }
   ],
   "source": [
    "test=pd.read_csv(\"C:/Users/Administrator/Test.csv\",encoding='gb18030')\n",
    "print(test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train dataset dimensions: (65535, 26)\n",
      "Test dataset dimensions: (37717, 24)\n"
     ]
    }
   ],
   "source": [
    "print (\"Train dataset dimensions:\",train.shape)\n",
    "print (\"Test dataset dimensions:\",test.shape)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>Gender</th>\n",
       "      <th>City</th>\n",
       "      <th>Monthly_Income</th>\n",
       "      <th>DOB</th>\n",
       "      <th>Lead_Creation_Date</th>\n",
       "      <th>Loan_Amount_Applied</th>\n",
       "      <th>Loan_Tenure_Applied</th>\n",
       "      <th>Existing_EMI</th>\n",
       "      <th>Employer_Name</th>\n",
       "      <th>...</th>\n",
       "      <th>Interest_Rate</th>\n",
       "      <th>Processing_Fee</th>\n",
       "      <th>EMI_Loan_Submitted</th>\n",
       "      <th>Filled_Form</th>\n",
       "      <th>Device_Type</th>\n",
       "      <th>Var2</th>\n",
       "      <th>Source</th>\n",
       "      <th>Var4</th>\n",
       "      <th>LoggedIn</th>\n",
       "      <th>Disbursed</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>65530</th>\n",
       "      <td>ID094040Y00</td>\n",
       "      <td>Male</td>\n",
       "      <td>Bengaluru</td>\n",
       "      <td>16500</td>\n",
       "      <td>11-Jan-87</td>\n",
       "      <td>12-Jul-15</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>AEGIS LTD</td>\n",
       "      <td>...</td>\n",
       "      <td>18.25</td>\n",
       "      <td>2600.0</td>\n",
       "      <td>7671.51</td>\n",
       "      <td>Y</td>\n",
       "      <td>Mobile</td>\n",
       "      <td>G</td>\n",
       "      <td>S122</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>65531</th>\n",
       "      <td>ID094041Z10</td>\n",
       "      <td>Female</td>\n",
       "      <td>Kolkata</td>\n",
       "      <td>23800</td>\n",
       "      <td>18-Aug-72</td>\n",
       "      <td>12-Jul-15</td>\n",
       "      <td>500000.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>2609.0</td>\n",
       "      <td>ANTRAWEB TECHNOLOGIES PVT LTD</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>N</td>\n",
       "      <td>Web-browser</td>\n",
       "      <td>G</td>\n",
       "      <td>S122</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>65532</th>\n",
       "      <td>ID094042A20</td>\n",
       "      <td>Male</td>\n",
       "      <td>Delhi</td>\n",
       "      <td>45000</td>\n",
       "      <td>12-Nov-85</td>\n",
       "      <td>12-Jul-15</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>SEA BAUFORMAT INDIA PVT LTD</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>N</td>\n",
       "      <td>Web-browser</td>\n",
       "      <td>G</td>\n",
       "      <td>S122</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>65533</th>\n",
       "      <td>ID094043B30</td>\n",
       "      <td>Male</td>\n",
       "      <td>Hyderabad</td>\n",
       "      <td>14200</td>\n",
       "      <td>31-Oct-81</td>\n",
       "      <td>12-Jul-15</td>\n",
       "      <td>100000.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>4444.0</td>\n",
       "      <td>YUGANDHAR TEKUMUDI</td>\n",
       "      <td>...</td>\n",
       "      <td>31.50</td>\n",
       "      <td>1800.0</td>\n",
       "      <td>3894.96</td>\n",
       "      <td>N</td>\n",
       "      <td>Web-browser</td>\n",
       "      <td>G</td>\n",
       "      <td>S122</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>65534</th>\n",
       "      <td>ID094044C40</td>\n",
       "      <td>Female</td>\n",
       "      <td>Tirunelveli</td>\n",
       "      <td>64000</td>\n",
       "      <td>19-Dec-85</td>\n",
       "      <td>12-Jul-15</td>\n",
       "      <td>500000.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>MECON LTD</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>N</td>\n",
       "      <td>Web-browser</td>\n",
       "      <td>G</td>\n",
       "      <td>S122</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 26 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                id  Gender         City  Monthly_Income        DOB  \\\n",
       "65530  ID094040Y00    Male    Bengaluru           16500  11-Jan-87   \n",
       "65531  ID094041Z10  Female      Kolkata           23800  18-Aug-72   \n",
       "65532  ID094042A20    Male        Delhi           45000  12-Nov-85   \n",
       "65533  ID094043B30    Male    Hyderabad           14200  31-Oct-81   \n",
       "65534  ID094044C40  Female  Tirunelveli           64000  19-Dec-85   \n",
       "\n",
       "      Lead_Creation_Date  Loan_Amount_Applied  Loan_Tenure_Applied  \\\n",
       "65530          12-Jul-15                  0.0                  0.0   \n",
       "65531          12-Jul-15             500000.0                  5.0   \n",
       "65532          12-Jul-15            1000000.0                  5.0   \n",
       "65533          12-Jul-15             100000.0                  3.0   \n",
       "65534          12-Jul-15             500000.0                  5.0   \n",
       "\n",
       "       Existing_EMI                  Employer_Name    ...    Interest_Rate  \\\n",
       "65530           0.0                      AEGIS LTD    ...            18.25   \n",
       "65531        2609.0  ANTRAWEB TECHNOLOGIES PVT LTD    ...              NaN   \n",
       "65532           0.0    SEA BAUFORMAT INDIA PVT LTD    ...              NaN   \n",
       "65533        4444.0             YUGANDHAR TEKUMUDI    ...            31.50   \n",
       "65534           0.0                      MECON LTD    ...              NaN   \n",
       "\n",
       "      Processing_Fee EMI_Loan_Submitted Filled_Form  Device_Type  Var2  \\\n",
       "65530         2600.0            7671.51           Y       Mobile     G   \n",
       "65531            NaN                NaN           N  Web-browser     G   \n",
       "65532            NaN                NaN           N  Web-browser     G   \n",
       "65533         1800.0            3894.96           N  Web-browser     G   \n",
       "65534            NaN                NaN           N  Web-browser     G   \n",
       "\n",
       "       Source  Var4 LoggedIn Disbursed  \n",
       "65530    S122     5        0       0.0  \n",
       "65531    S122     1        0       0.0  \n",
       "65532    S122     3        0       0.0  \n",
       "65533    S122     4        0       0.0  \n",
       "65534    S122     3        0       0.0  \n",
       "\n",
       "[5 rows x 26 columns]"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.head(5)\n",
    "train.tail(5)##后5行"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<bound method DataFrame.info of                 id  Gender           City  Monthly_Income        DOB  \\\n",
       "0      ID000002C20  Female          Delhi           20000  23-May-78   \n",
       "1      ID000004E40    Male         Mumbai           35000   7-Oct-85   \n",
       "2      ID000007H20    Male      Panchkula           22500  10-Oct-81   \n",
       "3      ID000008I30    Male        Saharsa           35000  30-Nov-87   \n",
       "4      ID000009J40    Male      Bengaluru          100000  17-Feb-84   \n",
       "5      ID000010K00    Male      Bengaluru           45000  21-Apr-82   \n",
       "6      ID000011L10  Female     Sindhudurg           70000  23-Oct-87   \n",
       "7      ID000012M20    Male      Bengaluru           20000  25-Jul-75   \n",
       "8      ID000013N30    Male          Kochi           75000  26-Jan-72   \n",
       "9      ID000014O40  Female         Mumbai           30000  12-Sep-89   \n",
       "10     ID000016Q10    Male         Mumbai           25000   1-Jan-76   \n",
       "11     ID000018S30  Female          Surat           25000  13-Oct-89   \n",
       "12     ID000019T40  Female           Pune           24000  22-May-90   \n",
       "13     ID000021V10    Male    Bhubaneswar           27000  24-Jun-82   \n",
       "14     ID000022W20  Female         Howrah           28000   9-Feb-89   \n",
       "15     ID000023X30    Male        Chennai           42000   8-May-82   \n",
       "16     ID000024Y40    Male       Ludhiana           28994  11-Oct-85   \n",
       "17     ID000025Z00  Female          Delhi           20000   6-Jan-90   \n",
       "18     ID000027B20  Female      Bengaluru           33000  14-Jul-76   \n",
       "19     ID000028C30    Male      Panchkula           31500  29-Aug-82   \n",
       "20     ID000029D40    Male        Lucknow           60000  14-Jul-85   \n",
       "21     ID000031F10  Female      Bengaluru           16000   1-Feb-83   \n",
       "22     ID000032G20  Female           Pune           12000  25-Jan-87   \n",
       "23     ID000033H30    Male      Bardhaman           30000  10-Feb-73   \n",
       "24     ID000034I40    Male         Indore           45000  12-Dec-75   \n",
       "25     ID000035J00  Female      Hyderabad           45000  11-Jan-81   \n",
       "26     ID000037L20    Male      Bengaluru           22843   8-Jun-91   \n",
       "27     ID000040O00  Female          Delhi            2900  22-Jul-82   \n",
       "28     ID000041P10  Female        Udaipur            8500  20-May-94   \n",
       "29     ID000043R30    Male         Mumbai          200000   5-Feb-85   \n",
       "...            ...     ...            ...             ...        ...   \n",
       "65505  ID094003N30    Male  Visakhapatnam          150000   3-Aug-91   \n",
       "65506  ID094004O40    Male  Visakhapatnam           80000  18-Apr-88   \n",
       "65507  ID094005P00    Male          Thane           60000  26-Sep-79   \n",
       "65508  ID094006Q10    Male     Chandigarh           12500  13-Oct-88   \n",
       "65509  ID094007R20    Male      Bengaluru           25000  16-Mar-87   \n",
       "65510  ID094009T40    Male          Surat           13009  29-May-80   \n",
       "65511  ID094011V10  Female         Mumbai           13000  25-Jun-79   \n",
       "65512  ID094012W20  Female         Mumbai           23000  29-Feb-88   \n",
       "65513  ID094013X30    Male        Chennai           23000  10-Jan-77   \n",
       "65514  ID094014Y40  Female         Mumbai           20000  14-Aug-85   \n",
       "65515  ID094016A10    Male     Coimbatore           15200   7-Mar-83   \n",
       "65516  ID094018C30  Female      Bengaluru           35000  17-Oct-86   \n",
       "65517  ID094020E00  Female     Vijayawada            8000  29-Jul-89   \n",
       "65518  ID094021F10    Male        Hooghly           30000   4-Oct-89   \n",
       "65519  ID094024I40  Female         Bhopal            7000  26-Oct-91   \n",
       "65520  ID094025J00    Male           Agra           13000  19-Aug-92   \n",
       "65521  ID094027L20    Male          Delhi           60000   9-Feb-84   \n",
       "65522  ID094028M30    Male          Delhi           23800  12-Feb-90   \n",
       "65523  ID094029N40    Male          Noida           98000   2-Jan-91   \n",
       "65524  ID094031P10  Female      Ghaziabad           15100   2-May-88   \n",
       "65525  ID094032Q20    Male      Faridabad           38000  24-Apr-70   \n",
       "65526  ID094033R30  Female         Jaipur           10000  10-Jul-84   \n",
       "65527  ID094035T00    Male      Bengaluru           15800  11-Jan-90   \n",
       "65528  ID094036U10    Male          Thane           18000  11-Nov-79   \n",
       "65529  ID094038W30  Female           Pune           16000  20-Sep-90   \n",
       "65530  ID094040Y00    Male      Bengaluru           16500  11-Jan-87   \n",
       "65531  ID094041Z10  Female        Kolkata           23800  18-Aug-72   \n",
       "65532  ID094042A20    Male          Delhi           45000  12-Nov-85   \n",
       "65533  ID094043B30    Male      Hyderabad           14200  31-Oct-81   \n",
       "65534  ID094044C40  Female    Tirunelveli           64000  19-Dec-85   \n",
       "\n",
       "      Lead_Creation_Date  Loan_Amount_Applied  Loan_Tenure_Applied  \\\n",
       "0              15-May-15             300000.0                  5.0   \n",
       "1               4-May-15             200000.0                  2.0   \n",
       "2              19-May-15             600000.0                  4.0   \n",
       "3               9-May-15            1000000.0                  5.0   \n",
       "4              20-May-15             500000.0                  2.0   \n",
       "5              20-May-15             300000.0                  5.0   \n",
       "6               1-May-15                  6.0                  5.0   \n",
       "7              20-May-15             200000.0                  5.0   \n",
       "8               2-May-15                  0.0                  0.0   \n",
       "9               3-May-15             300000.0                  3.0   \n",
       "10              2-May-15            1000000.0                  5.0   \n",
       "11              2-May-15             140000.0                  4.0   \n",
       "12              2-May-15             500000.0                  4.0   \n",
       "13              9-May-15             200000.0                  5.0   \n",
       "14             13-May-15             100000.0                  1.0   \n",
       "15              5-May-15             500000.0                  3.0   \n",
       "16              8-May-15             300000.0                  5.0   \n",
       "17              1-May-15             100000.0                  5.0   \n",
       "18             24-May-15             500000.0                  5.0   \n",
       "19              1-May-15             500000.0                  5.0   \n",
       "20              1-May-15                  0.0                  0.0   \n",
       "21              1-May-15                  0.0                  0.0   \n",
       "22              1-May-15                  0.0                  0.0   \n",
       "23              1-May-15            1000000.0                  5.0   \n",
       "24              1-May-15                  0.0                  0.0   \n",
       "25              1-May-15             300000.0                  1.0   \n",
       "26              1-May-15             300000.0                  2.0   \n",
       "27              1-May-15                  0.0                  0.0   \n",
       "28              1-May-15             100000.0                  2.0   \n",
       "29              1-May-15            1000000.0                  0.0   \n",
       "...                  ...                  ...                  ...   \n",
       "65505          12-Jul-15             100000.0                  3.0   \n",
       "65506          12-Jul-15            1000000.0                  5.0   \n",
       "65507          12-Jul-15                  0.0                  0.0   \n",
       "65508          12-Jul-15                  0.0                  0.0   \n",
       "65509          12-Jul-15                  0.0                  0.0   \n",
       "65510          12-Jul-15                  0.0                  0.0   \n",
       "65511          12-Jul-15                  0.0                  0.0   \n",
       "65512          12-Jul-15              60000.0                  3.0   \n",
       "65513          12-Jul-15                  0.0                  0.0   \n",
       "65514          12-Jul-15              50000.0                  1.0   \n",
       "65515          12-Jul-15             200000.0                  0.0   \n",
       "65516          12-Jul-15              50000.0                  1.0   \n",
       "65517          12-Jul-15             100000.0                  0.0   \n",
       "65518          12-Jul-15             300000.0                  3.0   \n",
       "65519          12-Jul-15                  0.0                  0.0   \n",
       "65520          12-Jul-15                  0.0                  0.0   \n",
       "65521          12-Jul-15             500000.0                  5.0   \n",
       "65522          12-Jul-15                  0.0                  0.0   \n",
       "65523          12-Jul-15                  0.0                  0.0   \n",
       "65524          12-Jul-15                  0.0                  0.0   \n",
       "65525          12-Jul-15                  0.0                  0.0   \n",
       "65526          12-Jul-15                  0.0                  0.0   \n",
       "65527          12-Jul-15                  0.0                  0.0   \n",
       "65528          12-Jul-15                  0.0                  0.0   \n",
       "65529          12-Jul-15                  0.0                  0.0   \n",
       "65530          12-Jul-15                  0.0                  0.0   \n",
       "65531          12-Jul-15             500000.0                  5.0   \n",
       "65532          12-Jul-15            1000000.0                  5.0   \n",
       "65533          12-Jul-15             100000.0                  3.0   \n",
       "65534          12-Jul-15             500000.0                  5.0   \n",
       "\n",
       "       Existing_EMI                                 Employer_Name    ...     \\\n",
       "0               0.0                                       CYBOSOL    ...      \n",
       "1               0.0           TATA CONSULTANCY SERVICES LTD (TCS)    ...      \n",
       "2               0.0                       ALCHEMIST HOSPITALS LTD    ...      \n",
       "3               0.0                              BIHAR GOVERNMENT    ...      \n",
       "4           25000.0                          GLOBAL EDGE SOFTWARE    ...      \n",
       "5           15000.0  COGNIZANT TECHNOLOGY SOLUTIONS INDIA PVT LTD    ...      \n",
       "6               0.0                          CARNIVAL CRUISE LINE    ...      \n",
       "7            2597.0               GOLDEN TULIP FLORITECH PVT. LTD    ...      \n",
       "8               0.0                                  SIIS PVT LTD    ...      \n",
       "9               0.0                                SOUNDCLOUD.COM    ...      \n",
       "10              0.0                                 KRISHNA KUMAR    ...      \n",
       "11              0.0                        S D JAIN MODERN SCHOOL    ...      \n",
       "12              0.0         K.E.M. HOSPITAL RESEARCH CENTRE, PUNE    ...      \n",
       "13           4600.0                  GI STAFFING SERVICES PVT LTD    ...      \n",
       "14           1200.0                        MCX STOCK EXCHANGE LTD    ...      \n",
       "15              0.0                            SMEC INDIA PVT LTD    ...      \n",
       "16           2550.0                            UNIPARTS INDIA LTD    ...      \n",
       "17              0.0                             INTEC CAPITAL LTD    ...      \n",
       "18           7000.0                                   N RAVIKUMAR    ...      \n",
       "19          10000.0               S P SINGLA CONSTRUCTION PVT LTD    ...      \n",
       "20              0.0                    TCS AND ASSOCIATES PVT LTD    ...      \n",
       "21              0.0                       RELIANCE RETAIL LIMITED    ...      \n",
       "22              0.0                     TERNT HYPERMARKET LIMITED    ...      \n",
       "23           5000.0                                 MD.IDRIS KHAN    ...      \n",
       "24              0.0                                 DILIP SOLANKI    ...      \n",
       "25              0.0             CIGNITI SOFTWARE SERVICES PVT LTD    ...      \n",
       "26              0.0            SYNERGY BUSINESS SOLUTIONS PVT LTD    ...      \n",
       "27              0.0  INVENTIV INTERNATIONAL PHARMA SERVICES P LTD    ...      \n",
       "28              0.0                                      ARC GATE    ...      \n",
       "29              0.0                     APT BUSINESS SERVICES LLP    ...      \n",
       "...             ...                                           ...    ...      \n",
       "65505           0.0             HOSPIRA HEALTH CARE INDIA PVT LTD    ...      \n",
       "65506           0.0                                ICICI BANK LTD    ...      \n",
       "65507           0.0                             HOUSTON HOME LOAN    ...      \n",
       "65508           0.0                 SHRIRAM FORTUNE SOLUTIONS LTD    ...      \n",
       "65509           0.0                    ACCENTURE SERVICES PVT LTD    ...      \n",
       "65510           0.0                     VINOD MEDICAL SYSTEMS PVT    ...      \n",
       "65511           0.0                        ADLABS IMAGICA KHOPOLI    ...      \n",
       "65512        8500.0                 ECLINICAL WORKS INDIA PVT LTD    ...      \n",
       "65513           0.0          ADITYA AUTO PRODUCTS AND ENGINEERING    ...      \n",
       "65514        2000.0                                        SATISH    ...      \n",
       "65515        2900.0                              V TORK CONTROLS     ...      \n",
       "65516        7100.0                    TCS AND ASSOCIATES PVT LTD    ...      \n",
       "65517        2500.0                                     JAGADEESH    ...      \n",
       "65518           0.0                                          ARMY    ...      \n",
       "65519           0.0                                   ROYAL WATCH    ...      \n",
       "65520           0.0                                      GENPACT     ...      \n",
       "65521           0.0     DELL INTERNATIONAL SERVICES INDIA PVT LTD    ...      \n",
       "65522           0.0      EMED LIFE INSURANCE BROKING SERVICES LTD    ...      \n",
       "65523           0.0                    IMPRESSIONS SERVICES P LTD    ...      \n",
       "65524           0.0                   SUNSTAR PRECISION FORGE LTD    ...      \n",
       "65525           0.0         MANPOWER GROUP SERVICES INDIA PVT LTD    ...      \n",
       "65526           0.0                                           OSF    ...      \n",
       "65527           0.0                 KADENCE INTERNATIONAL PVT LTD    ...      \n",
       "65528           0.0                         WATSON PHARMA PVT LTD    ...      \n",
       "65529           0.0                    HDFC LIFE INSURANCE CO LTD    ...      \n",
       "65530           0.0                                     AEGIS LTD    ...      \n",
       "65531        2609.0                 ANTRAWEB TECHNOLOGIES PVT LTD    ...      \n",
       "65532           0.0                   SEA BAUFORMAT INDIA PVT LTD    ...      \n",
       "65533        4444.0                            YUGANDHAR TEKUMUDI    ...      \n",
       "65534           0.0                                     MECON LTD    ...      \n",
       "\n",
       "      Interest_Rate Processing_Fee EMI_Loan_Submitted Filled_Form  \\\n",
       "0               NaN            NaN                NaN           N   \n",
       "1             13.25            NaN             6762.9           N   \n",
       "2               NaN            NaN                NaN           N   \n",
       "3               NaN            NaN                NaN           N   \n",
       "4               NaN            NaN                NaN           N   \n",
       "5             13.99         1500.0            6978.92           N   \n",
       "6               NaN            NaN                NaN           N   \n",
       "7               NaN            NaN                NaN           N   \n",
       "8             14.85        26000.0            30824.7           Y   \n",
       "9             18.25         1500.0            10883.4           N   \n",
       "10            20.00         6600.0              17486           N   \n",
       "11              NaN            NaN                NaN           N   \n",
       "12              NaN            NaN                NaN           N   \n",
       "13            18.00         4500.0            5078.69           N   \n",
       "14              NaN            NaN                NaN           N   \n",
       "15              NaN            NaN                NaN           N   \n",
       "16            15.50         6000.0            7215.96           N   \n",
       "17              NaN            NaN                NaN           N   \n",
       "18              NaN            NaN                NaN           N   \n",
       "19              NaN            NaN                NaN           N   \n",
       "20              NaN            NaN                NaN           N   \n",
       "21              NaN            NaN                NaN           N   \n",
       "22              NaN            NaN                NaN           N   \n",
       "23              NaN            NaN                NaN           N   \n",
       "24              NaN            NaN                NaN           N   \n",
       "25              NaN            NaN                NaN           N   \n",
       "26            20.00         2600.0            13232.9           N   \n",
       "27              NaN            NaN                NaN           N   \n",
       "28              NaN            NaN                NaN           N   \n",
       "29              NaN            NaN                NaN           N   \n",
       "...             ...            ...                ...         ...   \n",
       "65505         16.00         1000.0             3515.7           N   \n",
       "65506           NaN            NaN                NaN           N   \n",
       "65507         13.99         6300.0           29311.46           Y   \n",
       "65508         18.25         1900.0             5606.1           Y   \n",
       "65509         13.00         3360.0           11267.55           Y   \n",
       "65510           NaN            NaN                NaN           N   \n",
       "65511           NaN            NaN                NaN           N   \n",
       "65512           NaN            NaN                NaN           N   \n",
       "65513         28.50         7800.0           13704.15           Y   \n",
       "65514           NaN            NaN                NaN           N   \n",
       "65515           NaN            NaN                NaN           N   \n",
       "65516           NaN            NaN                NaN           N   \n",
       "65517           NaN            NaN                NaN           N   \n",
       "65518         19.75         6000.0            11110.9           N   \n",
       "65519           NaN            NaN                NaN           N   \n",
       "65520         37.00         3800.0            7635.84           Y   \n",
       "65521         13.99         2000.0           11631.53           N   \n",
       "65522         16.75         6600.0           11724.38           Y   \n",
       "65523         15.50        24000.0           28863.83           Y   \n",
       "65524           NaN            NaN                NaN           N   \n",
       "65525         13.99         3450.0           18851.81           Y   \n",
       "65526           NaN            NaN                NaN           N   \n",
       "65527         31.50         5000.0            9220.91           Y   \n",
       "65528         14.25         3360.0            9827.19           Y   \n",
       "65529         31.50         5200.0            9589.74           Y   \n",
       "65530         18.25         2600.0            7671.51           Y   \n",
       "65531           NaN            NaN                NaN           N   \n",
       "65532           NaN            NaN                NaN           N   \n",
       "65533         31.50         1800.0            3894.96           N   \n",
       "65534           NaN            NaN                NaN           N   \n",
       "\n",
       "       Device_Type  Var2  Source  Var4 LoggedIn Disbursed  \n",
       "0      Web-browser     G    S122     1        0       0.0  \n",
       "1      Web-browser     G    S122     3        0       0.0  \n",
       "2      Web-browser     B    S143     1        0       0.0  \n",
       "3      Web-browser     B    S143     3        0       0.0  \n",
       "4      Web-browser     B    S134     3        1       0.0  \n",
       "5      Web-browser     B    S143     3        1       0.0  \n",
       "6      Web-browser     B    S133     1        0       0.0  \n",
       "7      Web-browser     B    S159     3        0       0.0  \n",
       "8           Mobile     C    S122     5        0       0.0  \n",
       "9      Web-browser     B    S133     1        0       0.0  \n",
       "10     Web-browser     B    S133     4        0       0.0  \n",
       "11     Web-browser     B    S122     1        0       0.0  \n",
       "12     Web-browser     B    S133     1        0       0.0  \n",
       "13     Web-browser     B    S133     4        0       0.0  \n",
       "14     Web-browser     B    S151     1        0       0.0  \n",
       "15     Web-browser     B    S159     3        0       0.0  \n",
       "16     Web-browser     E    S122     1        1       0.0  \n",
       "17     Web-browser     B    S122     1        0       0.0  \n",
       "18     Web-browser     E    S133     1        0       0.0  \n",
       "19     Web-browser     E    S133     3        0       0.0  \n",
       "20          Mobile     F    S133     2        0       0.0  \n",
       "21          Mobile     C    S133     1        0       0.0  \n",
       "22          Mobile     C    S133     1        0       0.0  \n",
       "23     Web-browser     E    S133     3        0       0.0  \n",
       "24     Web-browser     E    S133     3        0       0.0  \n",
       "25     Web-browser     B    S133     3        0       0.0  \n",
       "26     Web-browser     E    S133     4        0       0.0  \n",
       "27          Mobile     C    S133     1        0       0.0  \n",
       "28     Web-browser     B    S133     1        0       0.0  \n",
       "29     Web-browser     B    S159     2        0       0.0  \n",
       "...            ...   ...     ...   ...      ...       ...  \n",
       "65505  Web-browser     G    S122     4        0       0.0  \n",
       "65506  Web-browser     G    S122     2        0       0.0  \n",
       "65507       Mobile     G    S122     5        0       0.0  \n",
       "65508       Mobile     G    S122     5        0       0.0  \n",
       "65509       Mobile     G    S122     5        0       0.0  \n",
       "65510       Mobile     G    S122     3        0       0.0  \n",
       "65511       Mobile     G    S122     1        0       0.0  \n",
       "65512  Web-browser     G    S122     3        0       0.0  \n",
       "65513       Mobile     G    S122     5        0       0.0  \n",
       "65514  Web-browser     G    S122     1        0       0.0  \n",
       "65515  Web-browser     G    S122     3        0       0.0  \n",
       "65516  Web-browser     G    S122     1        0       0.0  \n",
       "65517  Web-browser     G    S122     1        0       0.0  \n",
       "65518  Web-browser     G    S122     3        0       0.0  \n",
       "65519       Mobile     G    S122     1        0       0.0  \n",
       "65520       Mobile     G    S122     5        0       0.0  \n",
       "65521  Web-browser     G    S122     3        0       0.0  \n",
       "65522       Mobile     G    S122     5        0       0.0  \n",
       "65523       Mobile     G    S122     5        0       0.0  \n",
       "65524       Mobile     G    S122     1        0       0.0  \n",
       "65525       Mobile     G    S122     5        0       0.0  \n",
       "65526  Web-browser     G    S122     7        0       0.0  \n",
       "65527       Mobile     G    S122     5        0       0.0  \n",
       "65528       Mobile     G    S122     5        0       0.0  \n",
       "65529       Mobile     G    S122     5        0       0.0  \n",
       "65530       Mobile     G    S122     5        0       0.0  \n",
       "65531  Web-browser     G    S122     1        0       0.0  \n",
       "65532  Web-browser     G    S122     3        0       0.0  \n",
       "65533  Web-browser     G    S122     4        0       0.0  \n",
       "65534  Web-browser     G    S122     3        0       0.0  \n",
       "\n",
       "[65535 rows x 26 columns]>"
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.info"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAEKCAYAAADaa8itAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAGalJREFUeJzt3X+0XWV95/H3B/BHqmBAgmUICGqqpVoVUqClY61UDLQadHTEaSVDmabjwtaOrRWtq7Fal7hasbJqsVQixNWKiKOmimYi/pp2RAlKQUAXGWolC5TYID8VB/zOH+e5cJKce+9Oss+9HO/7tdZZ5+zvefbe38O65Lue/Tz72akqJEnqw17znYAk6SeHRUWS1BuLiiSpNxYVSVJvLCqSpN5YVCRJvbGoSJJ6Y1GRJPXGoiJJ6s0+853AXDvwwAPr8MMPn+80JGliXHXVVd+rqiVd2i64onL44YezadOm+U5DkiZGkn/r2tbLX5Kk3lhUJEm9sahIknpjUZEk9caiIknqjUVFktQbi4okqTcWFUlSbywqkqTeLLg76vfU0a9bN98p6GHoqr84bb5TkB4W7KlIknpjUZEk9caiIknqjUVFktQbi4okqTcWFUlSbywqkqTeWFQkSb2xqEiSemNRkST1ZqxFJcniJJcm+UaSG5L8YpIDkmxMcmN737+1TZJzk2xOck2So4aOs6q1vzHJqqH40UmubfucmyTj/D2SpJmNu6fybuDTVfU04JnADcBZwOVVtQy4vG0DnAQsa6/VwHkASQ4A1gDHAscAa6YKUWuzemi/FWP+PZKkGYytqCTZD3gOcAFAVf2oqr4PrAQuas0uAk5pn1cC62rgCmBxkoOBFwAbq2pbVd0ObARWtO/2q6ovVVUB64aOJUmaB+PsqTwJ2Aq8P8nXkrwvyWOAJ1TVrQDt/aDW/hDg5qH9t7TYTPEtI+I7SbI6yaYkm7Zu3brnv0ySNNI4i8o+wFHAeVX1bOAeHrrUNcqo8ZDajfjOwarzq2p5VS1fsmTJzFlLknbbOIvKFmBLVX25bV/KoMh8t126or3fNtT+0KH9lwK3zBJfOiIuSZonYysqVfUd4OYkT22hE4DrgfXA1AyuVcDH2+f1wGltFthxwB3t8tgG4MQk+7cB+hOBDe27u5Ic12Z9nTZ0LEnSPBj3kx9/D/j7JI8EbgJOZ1DILklyBvBt4GWt7WXAycBm4N7WlqraluStwJWt3Vuqalv7/CrgQmAR8Kn2kiTNk7EWlaq6Glg+4qsTRrQt4MxpjrMWWDsivgl4+h6mKUnqiXfUS5J6Y1GRJPXGoiJJ6o1FRZLUG4uKJKk3FhVJUm8sKpKk3lhUJEm9sahIknpjUZEk9caiIknqjUVFktQbi4okqTcWFUlSbywqkqTeWFQkSb2xqEiSemNRkST1xqIiSeqNRUWS1JtZi0qS45M8pn3+rSTnJHni+FOTJE2aLj2V84B7kzwT+GPg34B1Y81KkjSRuhSV+6uqgJXAu6vq3cC+401LkjSJuhSVu5K8AXgl8MkkewOP6HLwJN9Kcm2Sq5NsarEDkmxMcmN737/Fk+TcJJuTXJPkqKHjrGrtb0yyaih+dDv+5rZvduXHS5L61aWovBy4D/jtqvoOcAjwF7twjl+tqmdV1fK2fRZweVUtAy5v2wAnAcvaazWDy24kOQBYAxwLHAOsmSpErc3qof1W7EJekqSezVpUWiH5CPCoFvoe8NE9OOdK4KL2+SLglKH4uhq4Alic5GDgBcDGqtpWVbcDG4EV7bv9qupL7fLcuqFjSZLmQZfZX78DXAr8bQsdAnys4/EL+F9JrkqyusWeUFW3ArT3g4aOe/PQvltabKb4lhHxUb9hdZJNSTZt3bq1Y+qSpF21T4c2ZzK47PRlgKq6MclBM+/yoOOr6pbWfmOSb8zQdtR4SO1GfOdg1fnA+QDLly8f2UaStOe6jKncV1U/mtpIsg/T/OO9o6q6pb3fxuCS2THAd9ulK9r7ba35FuDQod2XArfMEl86Ii5JmiddisoXkrwRWJTk+cCHgX+cbackj0my79Rn4ETg68B6YGoG1yrg4+3zeuC0NgvsOOCOdnlsA3Bikv3bAP2JwIb23V1Jjmuzvk4bOpYkaR50ufx1FnAGcC3wu8BlwPs67PcE4KNtlu8+wD9U1aeTXAlckuQM4NvAy1r7y4CTgc3AvcDpAFW1LclbgStbu7dU1bb2+VXAhcAi4FPtJUmaJ12KyiJgbVX9HUC7T2URg3/4p1VVNwHPHBH/d+CEEfFiMH4z6lhrgbUj4puAp8/+EyRJc6HL5a/LGRSRKYuAz4wnHUnSJOtSVB5dVXdPbbTPPzW+lCRJk6pLUblnhyVTjgZ+ML6UJEmTqsuYyh8AH04yNV33YAZLt0iStJ1Zi0pVXZnkacBTGdxw+I2q+n9jz0ySNHG69FQAfgE4vLV/dhKqymeqSJK2M2tRSfIB4MnA1cADLTy1gKMkSQ/q0lNZDhzZ7iORJGlaXWZ/fR346XEnIkmafF16KgcC1yf5CoOHdQFQVS8aW1aSpInUpai8edxJSJJ+MnSZUvyFJE8EllXVZ5L8FLD3+FOTJE2acT/5UZK0gHQZqD8TOB64EwZPfuShRwBLkvSgsT75UZK0sIztyY+SpIWnS1E5C9jK9k9+fNM4k5IkTaYZZ3+1pzxeVFW/Bfzd3KQkSZpUM/ZUquoBYEmSR85RPpKkCdbl5sdvAf+cZD1wz1Swqs4ZV1KSpMnUpajc0l57AfuONx1J0iTrMqby2Kp63RzlI0maYF3GVI6aqY0kSVO6TCm+Osn6JK9M8pKpV9cTJNk7ydeSfKJtH5Hky0luTPKhqUkASR7Vtje37w8fOsYbWvybSV4wFF/RYpuTnNX5V0uSxqJLUTkA+HfgecAL2+s3duEcrwFuGNp+B/CuqloG3A6c0eJnALdX1VOAd7V2JDkSOBX4OWAF8DetUO0NvAc4CTgSeEVrK0maJ11WKT59dw+eZCnw68DbgNcmCYPi9F9ak4sYLK1/HrCSh5bZvxT469Z+JXBxVd0H/GuSzcAxrd3mqrqpnevi1vb63c1XkrRnujyj/v2MWOurqn67w/H/CvhjHpo19njg+1V1f9vewmDVY9r7ze3Y9ye5o7U/BLhi6JjD+9y8Q/zYaX7DamA1wGGHHdYhbUnS7uhy+esTwCfb63JgP+Du2XZK8hvAbVV11XB4RNOa5btdje8crDq/qpZX1fIlS5bMkLUkaU90ufz1keHtJB8EPtPh2McDL0pyMvBoBsXor4DFSfZpvZWlDO6BgUFP41BgS1sJ+XHAtqH4lOF9potLkuZBl57KjpYBs15Dqqo3VNXSqjqcwUD7Z6vqN4HPAS9tzVYBH2+f17dt2vefrapq8VPb7LAj2vm/AlwJLGuzyR7ZzrF+N36PJKknXcZU7mL7y0rfAV6/B+d8PXBxkj8HvgZc0OIXAB9oA/HbGBQJquq6JJcwGIC/Hziz3T9DklcDGxg83nhtVV23B3lJkvZQl8tfe7w0S1V9Hvh8+3wTD83eGm7zQ+Bl0+z/NgYzyHaMX8ZgKX5J0sNAl2fUvzjJ44a2Fyc5ZbxpSZImUZcxlTVVdcfURlV9H1gzvpQkSZOqS1EZ1abL6saSpAWmS1HZlOScJE9O8qQk7wKumnUvSdKC06Wo/B7wI+BDwCXAD4Azx5mUJGkydZn9dQ/gCsCSpFl1mf21Mcnioe39k2wYb1qSpEnU5fLXgW3GFwBVdTtw0PhSkiRNqi5F5cdJHlyWJckTmWbhRknSwtZlavCfAP+U5Att+zm0ZeQlSRrWZaD+00mOAo5rof9RVd8bb1qSpEnU9SbGX2LQQ5nyiTHkIkmacF1mf53N4Dnz17fXa5K8fdyJSZImT5eeysnAs6rqxwBJLmKwZP0bxpmYJGnydH1I1+Khz4+btpUkaUHr0lN5O/C1JJ9j8Fz452AvRZI0QpfZXx9M8nngFxgUlddX1XfGnZgkafJ0mv1VVbfi898lSbPoOqYiSdKspi0qSY6Yy0QkSZNvpp7KpQBJLp+jXCRJE26mMZW9kqwBfibJa3f8sqrOGV9akqRJNFNP5VTghwwKz74jXpIkbWfankpVfRN4R5JrqupTu3rgJI8Gvgg8qp3n0qpa08ZqLgYOAL4KvLKqfpTkUcA64Gjg34GXV9W32rHeAJwBPAD8flVtaPEVwLuBvYH3VdXZu5qnJKk/XWZ//Z8k5yTZ1F7vTNLlrvr7gOdV1TOBZwErkhwHvAN4V1UtA25nUCxo77dX1VOAd7V2JDmSQa/p54AVwN8k2TvJ3sB7gJOAI4FXtLaSpHnSpaisBe4C/nN73Qm8f7adauDutvmI9irgebRJAMBFwCnt88q2Tfv+hCRp8Yur6r6q+ldgM3BMe22uqpuq6kcMej8rO/weSdKYdLn58clV9Z+Gtv8sydVdDt56E1cBT2HQq/i/wPer6v7WZAtwSPt8CHAzQFXdn+QO4PEtfsXQYYf3uXmH+LFd8pIkjUeXnsoPkvzy1EaS44EfdDl4VT1QVc8CljLoWfzsqGZTh57mu12N7yTJ6qnLd1u3bp09cUnSbunSU/nvwLqhcZTbgVW7cpKq+n5bP+w4YHGSfVpvZSlwS2u2BTgU2JJkHwarIW8bik8Z3me6+I7nPx84H2D58uUjC48kac/N2lOpqn9pg+0/D/x8VT27qq6Zbb8kS5Isbp8XAb8G3AB8Dnhpa7YK+Hj7vJ6HitVLgc9WVbX4qUke1WaOLQO+AlwJLEtyRJJHMhjMd30ySZpHXR8nTFXduYvHPhi4qI2r7AVcUlWfSHI9cHGSP2fwsK8LWvsLgA8k2cygh3JqO+91SS5h8NTJ+4Ezq+oBgCSvBjYwmFK8tqqu28UcJUk96lxUdlXrzTx7RPwmBuMrO8Z/CLxsmmO9DXjbiPhlwGV7nKwkqReuUixJ6k2nnkqSXwIOH25fVevGlJMkaULNWlSSfAB4MnA1g2VSYDB116IiSdpOl57KcuDINhNLkqRpdRlT+Trw0+NORJI0+br0VA4Erk/yFQaLRAJQVS8aW1aSpInUpai8edxJSJJ+MsxaVKrqC0meCCyrqs8k+SkGNxtKkrSdWcdUkvwOg6Xo/7aFDgE+Ns6kJEmTqctA/ZnA8Qyeo0JV3QgcNM6kJEmTqUtRua89BAuAtoKw04slSTvpUlS+kOSNwKIkzwc+DPzjeNOSJE2iLkXlLGArcC3wuwwWcHzTOJOSJE2mLrO/fpzkIuDLDC57fdO76yVJo3RZ++vXgfcyeL58gCOS/G5VfWrcyUmSJkuXmx/fCfxqVW0GSPJk4JOARUWStJ0uYyq3TRWU5ibgtjHlI0maYNP2VJK8pH28LsllwCUMxlRexuD58JIkbWemy18vHPr8XeBX2uetwP5jy0iSNLGmLSpVdfpcJiJJmnxdZn8dAfweOz9O2KXvJUnb6TL762PABQzuov/xeNORJE2yLkXlh1V17tgzkSRNvC5Tit+dZE2SX0xy1NRrtp2SHJrkc0luSHJdkte0+AFJNia5sb3v3+JJcm6SzUmuGT5HklWt/Y1JVg3Fj05ybdvn3CTZjf8GkqSedOmpPAN4JfA8Hrr8VW17JvcDf1hVX02yL3BVko3AfwUur6qzk5zFYG2x1wMnAcva61jgPODYJAcAa4Dl7bxXJVlfVbe3NquBKxisSbYCb8qUpHnTpai8GHjS8PL3XVTVrcCt7fNdSW5g8ICvlcBzW7OLgM8zKCorgXVtXbErkixOcnBru7GqtgG0wrQiyeeB/arqSy2+DjgFi4okzZsul7/+BVi8JydJcjjwbAaLUj6hFZypwjP1wK9DgJuHdtvSYjPFt4yIS5LmSZeeyhOAbyS5ErhvKth1SnGSxwIfAf6gqu6cYdhj1Be1G/FROaxmcJmMww47bLaUJUm7qUtRWbO7B0/yCAYF5e+r6n+28HeTHFxVt7bLW1PriG0BDh3afSlwS4s/d4f451t86Yj2O6mq84HzAZYvX+6y/ZI0JrNe/qqqL4x6zbZfm4l1AXBDVZ0z9NV6YGoG1yrg40Px09ossOOAO9rlsQ3AiUn2bzPFTgQ2tO/uSnJcO9dpQ8eSJM2DLnfU38VDl5UeCTwCuKeq9ptl1+MZzBq7NsnVLfZG4GzgkiRnAN9msEAlDGZvnQxsBu4FTgeoqm1J3spDi1i+ZWrQHngVcCGwiMEAvYP0kjSPujz5cd/h7SSnAMd02O+fGD3uAXDCiPYFnDnNsdYCa0fENwFPny0XSdLc6DL7aztV9TFmv0dFkrQAdbn89ZKhzb146CZESZK202X21/BzVe4HvsXgRkVJkrbTZUzF56pIkjqZ6XHCfzrDflVVbx1DPpKkCTZTT+WeEbHHAGcAjwcsKpKk7cz0OOF3Tn1uqwy/hsG9IxcD75xuP0nSwjXjmEpbdv61wG8yWFH4qLbkvCRJO5lpTOUvgJcwWDPrGVV195xlJUmaSDPd/PiHwH8A3gTckuTO9roryZ1zk54kaZLMNKayy3fbS5IWNguHJKk3FhVJUm8sKpKk3lhUJEm9sahIknpjUZEk9caiIknqjUVFktQbi4okqTcWFUlSbywqkqTeWFQkSb0ZW1FJsjbJbUm+PhQ7IMnGJDe29/1bPEnOTbI5yTVJjhraZ1Vrf2OSVUPxo5Nc2/Y5N0nG9VskSd2Ms6dyIbBih9hZwOVVtQy4vG0DnAQsa6/VwHnw4EPC1gDHAscAa6YKUWuzemi/Hc8lSZpjYysqVfVFYNsO4ZUMniBJez9lKL6uBq4AFic5GHgBsLGqtrUnTm4EVrTv9quqL1VVAeuGjiVJmidzPabyhKq6FaC9H9TihwA3D7Xb0mIzxbeMiEuS5tHDZaB+1HhI7UZ89MGT1Uk2Jdm0devW3UxRkjSbuS4q322Xrmjvt7X4FuDQoXZLgVtmiS8dER+pqs6vquVVtXzJkiV7/CMkSaPNdVFZD0zN4FoFfHwoflqbBXYccEe7PLYBODHJ/m2A/kRgQ/vuriTHtVlfpw0dS5I0T6Z9Rv2eSvJB4LnAgUm2MJjFdTZwSZIzgG8DL2vNLwNOBjYD9wKnA1TVtiRvBa5s7d5SVVOD/69iMMNsEfCp9pIkzaOxFZWqesU0X50wom0BZ05znLXA2hHxTcDT9yRHSVK/Hi4D9ZKknwAWFUlSbywqkqTeWFQkSb2xqEiSemNRkST1xqIiSeqNRUWS1BuLiiSpNxYVSVJvLCqSpN5YVCRJvbGoSJJ6Y1GRJPXGoiJJ6o1FRZLUG4uKJKk3FhVJUm8sKpKk3lhUJEm9sahIknpjUZEk9caiIknqzcQXlSQrknwzyeYkZ813PpK0kO0z3wnsiSR7A+8Bng9sAa5Msr6qrp/fzKT58e23PGO+U9DD0GF/eu2cnWvSeyrHAJur6qaq+hFwMbBynnOSpAVr0ovKIcDNQ9tbWkySNA8m+vIXkBGx2qlRshpY3TbvTvLNsWa1cBwIfG++k3g4yF+umu8UtDP/PqesGfVP5S55YteGk15UtgCHDm0vBW7ZsVFVnQ+cP1dJLRRJNlXV8vnOQxrFv8/5MemXv64EliU5IskjgVOB9fOckyQtWBPdU6mq+5O8GtgA7A2srarr5jktSVqwJrqoAFTVZcBl853HAuUlRT2c+fc5D1K107i2JEm7ZdLHVCRJDyMWFc1qtqVwkjwqyYfa919OcvjcZ6mFKMnaJLcl+fo03yfJue1v85okR811jguNRUUzGloK5yTgSOAVSY7codkZwO1V9RTgXcA75jZLLWAXAitm+P4kYFl7rQbOm4OcFjSLimbTZSmclcBF7fOlwAlJ9vhuK2k2VfVFYNsMTVYC62rgCmBxkoPnJruFyaKi2XRZCufBNlV1P3AH8Pg5yU6amUs5zTGLimbTZSmcTsvlSPPAv805ZlHRbLoshfNgmyT7AI9j5ksS0lzptJST+mNR0Wy6LIWzHphaUfGlwGfLG6D08LAeOK3NAjsOuKOqbp3vpH6STfwd9Rqv6ZbCSfIWYFNVrQcuAD6QZDODHsqp85exFpIkHwSeCxyYZAuwBngEQFW9l8FqGycDm4F7gdPnJ9OFwzvqJUm98fKXJKk3FhVJUm8sKpKk3lhUJEm9sahIknpjUZE6SvJAkquTXJfkX5K8Nsle7bvlSc6dYd/nJvnE3GW70/nfnOSP5uv8Wji8T0Xq7gdV9SyAJAcB/8Bg9YA1VbUJ2DSuEyfZp62rJj2s2VORdkNV3cZgKfVXt7u1H+yJJPmV1qO5OsnXkuzbdtsvyUeTXJ/kvUO9nLunjpvkpUkubJ8vTHJOks8B75juuElel+TK9ryQPxs61p+05+B8BnjqXPx3keypSLupqm5qheGgHb76I+DMqvrnJI8FftjixzB4Js2/AZ8GXsLgUQEz+Rng16rqgST/uONxk5zI4FkhxzBYPHF9kucA9zBY2eDZDP4//ypw1Z79Yml29lSkPTNqFdx/Bs5J8vvA4qHLVl9pz6V5APgg8Msdjv/h1n66457YXl9jUDiexqDI/Efgo1V1b1Xdyc7rtUljYVGRdlOSJwEPALcNx6vqbOC/AYuAK5I8beqrHQ5RI+KP3qHNPbMcN8Dbq+pZ7fWUqrpgmvNJY2dRkXZDkiXAe4G/3nFF5iRPrqprq+odDAbvp4rKMW21572AlwP/1OLfTfKzLf7iGc456rgbgN9ul8NIckibRPBF4MVJFrWxlxf29dulmTimInW3KMnVDFbBvR/4AHDOiHZ/kORXGfRirgc+Bfwi8CXgbOAZDP7R/2hrfxbwCQZPKPw68Nhpzr/TcavqviQ/C3ypPcH5buC3quqrST4EXM1gDOd/78kPl7pylWJJUm+8/CVJ6o1FRZLUG4uKJKk3FhVJUm8sKpKk3lhUJEm9sahIknpjUZEk9eb/A4Dok8rXtUwYAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import seaborn as sns\n",
    "sns.countplot(train['Disbursed']);\n",
    "plt.xlabel('Disbursed');\n",
    "plt.ylabel('Numben of occurrences');\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Monthly_Income</th>\n",
       "      <th>Loan_Amount_Applied</th>\n",
       "      <th>Loan_Tenure_Applied</th>\n",
       "      <th>Existing_EMI</th>\n",
       "      <th>Loan_Amount_Submitted</th>\n",
       "      <th>Loan_Tenure_Submitted</th>\n",
       "      <th>Interest_Rate</th>\n",
       "      <th>Processing_Fee</th>\n",
       "      <th>Var4</th>\n",
       "      <th>LoggedIn</th>\n",
       "      <th>Disbursed</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>6.553500e+04</td>\n",
       "      <td>6.552600e+04</td>\n",
       "      <td>65526.000000</td>\n",
       "      <td>6.552600e+04</td>\n",
       "      <td>4.027300e+04</td>\n",
       "      <td>40273.000000</td>\n",
       "      <td>21144.000000</td>\n",
       "      <td>20848.000000</td>\n",
       "      <td>65535.000000</td>\n",
       "      <td>65535.000000</td>\n",
       "      <td>65534.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>6.128629e+04</td>\n",
       "      <td>2.297530e+05</td>\n",
       "      <td>2.171825</td>\n",
       "      <td>3.615571e+03</td>\n",
       "      <td>3.942977e+05</td>\n",
       "      <td>3.944454</td>\n",
       "      <td>18.828438</td>\n",
       "      <td>5159.978655</td>\n",
       "      <td>2.893874</td>\n",
       "      <td>0.030777</td>\n",
       "      <td>0.015442</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>2.442714e+06</td>\n",
       "      <td>3.510082e+05</td>\n",
       "      <td>2.007108</td>\n",
       "      <td>2.346970e+04</td>\n",
       "      <td>3.032666e+05</td>\n",
       "      <td>1.195076</td>\n",
       "      <td>5.102259</td>\n",
       "      <td>4749.756630</td>\n",
       "      <td>1.636426</td>\n",
       "      <td>0.172716</td>\n",
       "      <td>0.123305</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>5.000000e+04</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>11.990000</td>\n",
       "      <td>200.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>1.630000e+04</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>2.000000e+05</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>15.250000</td>\n",
       "      <td>2000.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>2.500000e+04</td>\n",
       "      <td>1.000000e+05</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>3.000000e+05</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>18.150000</td>\n",
       "      <td>3900.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>3.950000e+04</td>\n",
       "      <td>3.000000e+05</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>3.500000e+03</td>\n",
       "      <td>5.000000e+05</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>20.000000</td>\n",
       "      <td>6336.000000</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>4.445544e+08</td>\n",
       "      <td>9.000000e+06</td>\n",
       "      <td>10.000000</td>\n",
       "      <td>5.454365e+06</td>\n",
       "      <td>3.000000e+06</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>37.000000</td>\n",
       "      <td>50000.000000</td>\n",
       "      <td>7.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       Monthly_Income  Loan_Amount_Applied  Loan_Tenure_Applied  Existing_EMI  \\\n",
       "count    6.553500e+04         6.552600e+04         65526.000000  6.552600e+04   \n",
       "mean     6.128629e+04         2.297530e+05             2.171825  3.615571e+03   \n",
       "std      2.442714e+06         3.510082e+05             2.007108  2.346970e+04   \n",
       "min      0.000000e+00         0.000000e+00             0.000000  0.000000e+00   \n",
       "25%      1.630000e+04         0.000000e+00             0.000000  0.000000e+00   \n",
       "50%      2.500000e+04         1.000000e+05             2.000000  0.000000e+00   \n",
       "75%      3.950000e+04         3.000000e+05             4.000000  3.500000e+03   \n",
       "max      4.445544e+08         9.000000e+06            10.000000  5.454365e+06   \n",
       "\n",
       "       Loan_Amount_Submitted  Loan_Tenure_Submitted  Interest_Rate  \\\n",
       "count           4.027300e+04           40273.000000   21144.000000   \n",
       "mean            3.942977e+05               3.944454      18.828438   \n",
       "std             3.032666e+05               1.195076       5.102259   \n",
       "min             5.000000e+04               1.000000      11.990000   \n",
       "25%             2.000000e+05               3.000000      15.250000   \n",
       "50%             3.000000e+05               4.000000      18.150000   \n",
       "75%             5.000000e+05               5.000000      20.000000   \n",
       "max             3.000000e+06               6.000000      37.000000   \n",
       "\n",
       "       Processing_Fee          Var4      LoggedIn     Disbursed  \n",
       "count    20848.000000  65535.000000  65535.000000  65534.000000  \n",
       "mean      5159.978655      2.893874      0.030777      0.015442  \n",
       "std       4749.756630      1.636426      0.172716      0.123305  \n",
       "min        200.000000      0.000000      0.000000      0.000000  \n",
       "25%       2000.000000      1.000000      0.000000      0.000000  \n",
       "50%       3900.000000      3.000000      0.000000      0.000000  \n",
       "75%       6336.000000      5.000000      0.000000      0.000000  \n",
       "max      50000.000000      7.000000      1.000000      1.000000  "
      ]
     },
     "execution_count": 60,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Program Files (x86)\\Microsoft Visual Studio\\Shared\\Anaconda3_64\\lib\\site-packages\\ipykernel_launcher.py:3: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version\n",
      "of pandas will change to not sort by default.\n",
      "\n",
      "To accept the future behavior, pass 'sort=True'.\n",
      "\n",
      "To retain the current behavior and silence the warning, pass sort=False\n",
      "\n",
      "  This is separate from the ipykernel package so we can avoid doing imports until\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "(103252, 27)"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train['source']='train'\n",
    "test['source']='test'\n",
    "data=pd.concat([train,test],ignore_index=True)\n",
    "data.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "City                       408\n",
       "DOB                          0\n",
       "Device_Type                  0\n",
       "Disbursed                37718\n",
       "EMI_Loan_Submitted       69997\n",
       "Employer_Name               51\n",
       "Existing_EMI                49\n",
       "Filled_Form                  0\n",
       "Gender                       0\n",
       "Interest_Rate            69998\n",
       "Lead_Creation_Date           0\n",
       "Loan_Amount_Applied         49\n",
       "Loan_Amount_Submitted    40184\n",
       "Loan_Tenure_Applied         49\n",
       "Loan_Tenure_Submitted    40184\n",
       "LoggedIn                 37717\n",
       "Mobile_Verified              0\n",
       "Monthly_Income               0\n",
       "Processing_Fee           70433\n",
       "Salary_Account           12997\n",
       "Source                       0\n",
       "Var1                         1\n",
       "Var2                         0\n",
       "Var4                         0\n",
       "Var5                         0\n",
       "id                           0\n",
       "source                       0\n",
       "dtype: int64"
      ]
     },
     "execution_count": 62,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.apply(lambda x: sum(x.isnull()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Gender属性有2的不同取值，各取值及其出现的次数\n",
      "\n",
      "Male      59561\n",
      "Female    43691\n",
      "Name: Gender, dtype: int64\n",
      "\n",
      "City属性有189的不同取值，各取值及其出现的次数\n",
      "\n",
      "Delhi                       15068\n",
      "Mumbai                      12813\n",
      "Bengaluru                   12704\n",
      "Hyderabad                    8605\n",
      "Chennai                      8069\n",
      "Pune                         6145\n",
      "others                       4428\n",
      "Kolkata                      3559\n",
      "Ahmedabad                    2109\n",
      "Jaipur                       1616\n",
      "Gurgaon                      1468\n",
      "Coimbatore                   1359\n",
      "Thane                        1086\n",
      "Chandigarh                   1069\n",
      "Surat                         985\n",
      "Visakhapatnam                 899\n",
      "Indore                        896\n",
      "Nagpur                        761\n",
      "Vadodara                      746\n",
      "Lucknow                       669\n",
      "Ghaziabad                     667\n",
      "Bhopal                        637\n",
      "Faridabad                     573\n",
      "Patna                         571\n",
      "Kochi                         566\n",
      "Noida                         458\n",
      "Madurai                       449\n",
      "Gautam Buddha Nagar           398\n",
      "Raipur                        379\n",
      "Dehradun                      378\n",
      "                            ...  \n",
      "Valsad                         37\n",
      "Rudrapur                       37\n",
      "Bhilwara                       36\n",
      "Nalgonda                       36\n",
      "Adilabad                       36\n",
      "Bikaner                        36\n",
      "Cuddalore                      35\n",
      "Sambalpur                      35\n",
      "Sikar                          34\n",
      "Jalpaiguri                     34\n",
      "Mathura                        34\n",
      "Sirsa                          34\n",
      "Muzaffarpur                    34\n",
      "Nagercoil                      33\n",
      "Midnapore West                 33\n",
      "Gulbarga                       33\n",
      "Chandrapur                     32\n",
      "Haldwani                       32\n",
      "Srikakulam                     32\n",
      "Virudhunagar                   32\n",
      "Ponda                          31\n",
      "Bahadurgarh                    31\n",
      "Karur                          31\n",
      "Agartala                       31\n",
      "Jhajjar                        31\n",
      "Udupi and Uttara Kannada       31\n",
      "Jhunjhunu                      30\n",
      "Durg                           30\n",
      "Nanded                         30\n",
      "Kannur                         30\n",
      "Name: City, Length: 188, dtype: int64\n",
      "\n",
      "Employer_Name属性有125的不同取值，各取值及其出现的次数\n",
      "\n",
      "others                                          86573\n",
      "0                                                4871\n",
      "TATA CONSULTANCY SERVICES LTD (TCS)               626\n",
      "COGNIZANT TECHNOLOGY SOLUTIONS INDIA PVT LTD      416\n",
      "ACCENTURE SERVICES PVT LTD                        396\n",
      "GOOGLE                                            350\n",
      "HCL TECHNOLOGIES LTD                              283\n",
      "ICICI BANK LTD                                    274\n",
      "IBM CORPORATION                                   225\n",
      "INFOSYS TECHNOLOGIES                              215\n",
      "INDIAN AIR FORCE                                  206\n",
      "INDIAN ARMY                                       205\n",
      "GENPACT                                           195\n",
      "WIPRO TECHNOLOGIES                                182\n",
      "ARMY                                              178\n",
      "HDFC BANK LTD                                     176\n",
      "STATE GOVERNMENT                                  171\n",
      "IKYA HUMAN CAPITAL SOLUTIONS LTD                  160\n",
      "TYPE SLOWLY FOR AUTO FILL                         158\n",
      "INDIAN RAILWAY                                    156\n",
      "WIPRO BPO                                         153\n",
      "INDIAN NAVY                                       145\n",
      "TECH MAHINDRA LTD                                 138\n",
      "CONVERGYS INDIA SERVICES PVT LTD                  134\n",
      "CONCENTRIX DAKSH SERVICES INDIA PVT LTD           129\n",
      "OTHERS                                            126\n",
      "IBM GLOBAL SERVICES INDIA LTD                     124\n",
      "ADECCO INDIA PVT LTD                              121\n",
      "CAPGEMINI INDIA PVT LTD                           121\n",
      "SUTHERLAND GLOBAL SERVICES PVT LTD                117\n",
      "                                                ...  \n",
      "HDFC STANDARD LIFE INSURANCE COMPANY LTD           49\n",
      "STATE BANK OF INDIA                                49\n",
      "JET AIRWAYS INDIA LTD                              48\n",
      "YES BANK LTD                                       48\n",
      "AMERICAN EXPRESS INDIA LTD                         48\n",
      "KOTAK LIFE INSURANCE                               48\n",
      "RELIANCE INDUSTRIES LTD                            48\n",
      "BHARTI AIRTEL LTD                                  47\n",
      "INTERGLOBE TECHNOLOGIES (IGT)                      47\n",
      "BARCLAYS SHARED SERVICES PVT LTD                   47\n",
      "MINDTREE LTD                                       47\n",
      "CENTRAL GOVERNMENT DEFENCE                         47\n",
      "G4S SECURE SOLUTIONS INDIA PVT LTD                 47\n",
      "SHRIRAM TRANSPORT FINANCE COMPANY LTD              46\n",
      "AXIS SECURITIES LTD                                46\n",
      "HBL GLOBAL PVT LTD                                 45\n",
      "IBM INDIA PVT LTD                                  45\n",
      "EXL SERVICE.COM INDIA PVT LTD                      45\n",
      "IENERGIZER IT SERVICES PVT LTD                     44\n",
      "IGATE GLOBAL SOLUTIONS LTD                         44\n",
      "ALTISOURCE BUSINESS SOLUTIONS PVT                  44\n",
      "AFCONS INFRASTRUCTURE LTD                          44\n",
      "GOVT OF INDIA                                      43\n",
      "MAGNA INFOTECH PVT LTD                             42\n",
      "VODAFONE                                           42\n",
      "DELOITTE CONSULTING INDIA PVT LTD                  41\n",
      "ECLERX SERVICES                                    41\n",
      "FUTURE RETAIL LTD                                  41\n",
      "LOBO STAFFING SOLUTIONS PVT LTD                    40\n",
      "NOKIA INDIA SALES PVT LTD                          40\n",
      "Name: Employer_Name, Length: 124, dtype: int64\n",
      "\n",
      "Salary_Account属性有50的不同取值，各取值及其出现的次数\n",
      "\n",
      "HDFC Bank                         20893\n",
      "ICICI Bank                        16286\n",
      "State Bank of India               14639\n",
      "Axis Bank                         10434\n",
      "Citibank                           2794\n",
      "Kotak Bank                         2438\n",
      "IDBI Bank                          1888\n",
      "Punjab National Bank               1488\n",
      "Bank of India                      1423\n",
      "Bank of Baroda                     1419\n",
      "Standard Chartered Bank            1203\n",
      "Canara Bank                        1144\n",
      "Union Bank of India                1099\n",
      "Yes Bank                            911\n",
      "ING Vysya                           847\n",
      "Corporation bank                    829\n",
      "Indian Overseas Bank                720\n",
      "State Bank of Hyderabad             713\n",
      "Indian Bank                         656\n",
      "Oriental Bank of Commerce           651\n",
      "Andhra Bank                         620\n",
      "IndusInd Bank                       598\n",
      "Central Bank of India               531\n",
      "Syndicate Bank                      514\n",
      "Bank of Maharasthra                 475\n",
      "HSBC                                387\n",
      "State Bank of Bikaner & Jaipur      380\n",
      "Karur Vysya Bank                    338\n",
      "State Bank of Mysore                327\n",
      "Federal Bank                        314\n",
      "Allahabad Bank                      300\n",
      "UCO Bank                            296\n",
      "Vijaya Bank                         291\n",
      "State Bank of Travancore            277\n",
      "Karnataka Bank                      231\n",
      "Saraswat Bank                       230\n",
      "United Bank of India                230\n",
      "Dena Bank                           230\n",
      "State Bank of Patiala               219\n",
      "South Indian Bank                   191\n",
      "Abhyuday Co-op Bank Ltd             146\n",
      "Deutsche Bank                       141\n",
      "The Ratnakar Bank Ltd                95\n",
      "others                               85\n",
      "Tamil Nadu Mercantile Bank           83\n",
      "Punjab & Sind bank                   70\n",
      "J&K Bank                             69\n",
      "Dhanalakshmi Bank Ltd                56\n",
      "Lakshmi Vilas bank                   56\n",
      "Name: Salary_Account, dtype: int64\n",
      "\n",
      "Mobile_Verified属性有3的不同取值，各取值及其出现的次数\n",
      "\n",
      "Y    67431\n",
      "N    35820\n",
      "0        1\n",
      "Name: Mobile_Verified, dtype: int64\n",
      "\n",
      "Var1属性有20的不同取值，各取值及其出现的次数\n",
      "\n",
      "HBXX    69997\n",
      "HBXC    11781\n",
      "HBXB     4418\n",
      "HAXA     3808\n",
      "HBXD     2563\n",
      "HAXB     2553\n",
      "HBXA     1919\n",
      "HAXC     1742\n",
      "HBXH     1182\n",
      "HCXF      873\n",
      "HAYT      600\n",
      "HAVC      529\n",
      "HAXM      344\n",
      "HCXD      286\n",
      "HCYS      253\n",
      "HVYS      187\n",
      "HAZD      124\n",
      "HCXG       85\n",
      "HAXF        7\n",
      "Name: Var1, dtype: int64\n",
      "\n",
      "Filled_Form属性有3的不同取值，各取值及其出现的次数\n",
      "\n",
      "N         79629\n",
      "Y         23622\n",
      "Mobile        1\n",
      "Name: Filled_Form, dtype: int64\n",
      "\n",
      "Var2属性有8的不同取值，各取值及其出现的次数\n",
      "\n",
      "B       53481\n",
      "G       25852\n",
      "C       20366\n",
      "E        1855\n",
      "D         918\n",
      "F         770\n",
      "A           9\n",
      "S122        1\n",
      "Name: Var2, dtype: int64\n",
      "\n",
      "Source属性有35的不同取值，各取值及其出现的次数\n",
      "\n",
      "S133    42900\n",
      "S122    33763\n",
      "S159     7999\n",
      "S143     6140\n",
      "S127     2804\n",
      "S137     2450\n",
      "S134     1900\n",
      "S161     1109\n",
      "S151     1018\n",
      "S157      929\n",
      "S153      705\n",
      "S144      447\n",
      "S156      432\n",
      "S158      294\n",
      "S123      112\n",
      "S141       83\n",
      "S162       60\n",
      "S124       43\n",
      "S150       19\n",
      "S160       11\n",
      "S138        5\n",
      "S155        5\n",
      "S136        5\n",
      "S139        4\n",
      "S129        4\n",
      "S135        2\n",
      "S131        1\n",
      "S154        1\n",
      "S130        1\n",
      "S132        1\n",
      "S142        1\n",
      "1           1\n",
      "S125        1\n",
      "S140        1\n",
      "S126        1\n",
      "Name: Source, dtype: int64\n",
      "\n",
      "Var4属性有8的不同取值，各取值及其出现的次数\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "3    28973\n",
      "1    28868\n",
      "5    24545\n",
      "2     8299\n",
      "4     7948\n",
      "0     2567\n",
      "7     1788\n",
      "6      264\n",
      "Name: Var4, dtype: int64\n"
     ]
    }
   ],
   "source": [
    "cat_features=['Gender','City','Employer_Name','Salary_Account','Mobile_Verified','Var1','Filled_Form','Var2','Source','Var4']\n",
    "for col in cat_features:\n",
    "    num_vlaules=len(data[col].unique())\n",
    "    print('\\n%s属性有%d的不同取值，各取值及其出现的次数\\n'% (col,num_vlaules))\n",
    "    print (data[col].value_counts())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [],
   "source": [
    "cat_features=['City','Employer_Name','Salary_Account','Source']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [],
   "source": [
    "rare_thresholds=[100,30,40,40]\n",
    "j=0\n",
    "for col in cat_features:\n",
    "    value_counts_col=data[col].value_counts(dropna=False)\n",
    "    rare_threshold=rare_thresholds[j]\n",
    "    value_counts_rare=list(value_counts_col[value_counts_col < rare_threshold].index)\n",
    "    \n",
    "    rare_index=data[col].isin(value_counts_rare)\n",
    "    data.loc[data[col].isin(value_counts_rare),col]=\"others\"\n",
    "    j=j+1\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [
    {
     "ename": "TypeError",
     "evalue": "ufunc subtract cannot use operands with types dtype('int64') and dtype('<M8[ns]')",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[1;32mD:\\Program Files (x86)\\Microsoft Visual Studio\\Shared\\Anaconda3_64\\lib\\site-packages\\pandas\\core\\ops.py\u001b[0m in \u001b[0;36mna_op\u001b[1;34m(x, y)\u001b[0m\n\u001b[0;32m   1008\u001b[0m         \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1009\u001b[1;33m             \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mexpressions\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mevaluate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mop\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstr_rep\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0meval_kwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   1010\u001b[0m         \u001b[1;32mexcept\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mD:\\Program Files (x86)\\Microsoft Visual Studio\\Shared\\Anaconda3_64\\lib\\site-packages\\pandas\\core\\computation\\expressions.py\u001b[0m in \u001b[0;36mevaluate\u001b[1;34m(op, op_str, a, b, use_numexpr, **eval_kwargs)\u001b[0m\n\u001b[0;32m    204\u001b[0m     \u001b[1;32mif\u001b[0m \u001b[0muse_numexpr\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 205\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0m_evaluate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mop\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mop_str\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0ma\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mb\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0meval_kwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    206\u001b[0m     \u001b[1;32mreturn\u001b[0m \u001b[0m_evaluate_standard\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mop\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mop_str\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0ma\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mb\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mD:\\Program Files (x86)\\Microsoft Visual Studio\\Shared\\Anaconda3_64\\lib\\site-packages\\pandas\\core\\computation\\expressions.py\u001b[0m in \u001b[0;36m_evaluate_numexpr\u001b[1;34m(op, op_str, a, b, truediv, reversed, **eval_kwargs)\u001b[0m\n\u001b[0;32m    119\u001b[0m     \u001b[1;32mif\u001b[0m \u001b[0mresult\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 120\u001b[1;33m         \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_evaluate_standard\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mop\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mop_str\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0ma\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mb\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    121\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mD:\\Program Files (x86)\\Microsoft Visual Studio\\Shared\\Anaconda3_64\\lib\\site-packages\\pandas\\core\\computation\\expressions.py\u001b[0m in \u001b[0;36m_evaluate_standard\u001b[1;34m(op, op_str, a, b, **eval_kwargs)\u001b[0m\n\u001b[0;32m     64\u001b[0m     \u001b[1;32mwith\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0merrstate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mall\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'ignore'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 65\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0mop\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0ma\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mb\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     66\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mTypeError\u001b[0m: ufunc subtract cannot use operands with types dtype('int64') and dtype('<M8[ns]')",
      "\nDuring handling of the above exception, another exception occurred:\n",
      "\u001b[1;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-77-f9ae9e33b0b3>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mdata\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'Age'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto_datetime\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'Lead_Creation_Date'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdt\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0myear\u001b[0m\u001b[1;33m-\u001b[0m\u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto_datetime\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'DOB'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;32mD:\\Program Files (x86)\\Microsoft Visual Studio\\Shared\\Anaconda3_64\\lib\\site-packages\\pandas\\core\\ops.py\u001b[0m in \u001b[0;36mwrapper\u001b[1;34m(left, right)\u001b[0m\n\u001b[0;32m   1064\u001b[0m             \u001b[0mrvalues\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mrvalues\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1065\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1066\u001b[1;33m         \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0msafe_na_op\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlvalues\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mrvalues\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   1067\u001b[0m         return construct_result(left, result,\n\u001b[0;32m   1068\u001b[0m                                 index=left.index, name=res_name, dtype=None)\n",
      "\u001b[1;32mD:\\Program Files (x86)\\Microsoft Visual Studio\\Shared\\Anaconda3_64\\lib\\site-packages\\pandas\\core\\ops.py\u001b[0m in \u001b[0;36msafe_na_op\u001b[1;34m(lvalues, rvalues)\u001b[0m\n\u001b[0;32m   1028\u001b[0m         \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1029\u001b[0m             \u001b[1;32mwith\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0merrstate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mall\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'ignore'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1030\u001b[1;33m                 \u001b[1;32mreturn\u001b[0m \u001b[0mna_op\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlvalues\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mrvalues\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   1031\u001b[0m         \u001b[1;32mexcept\u001b[0m \u001b[0mException\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1032\u001b[0m             \u001b[1;32mif\u001b[0m \u001b[0mis_object_dtype\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlvalues\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mD:\\Program Files (x86)\\Microsoft Visual Studio\\Shared\\Anaconda3_64\\lib\\site-packages\\pandas\\core\\ops.py\u001b[0m in \u001b[0;36mna_op\u001b[1;34m(x, y)\u001b[0m\n\u001b[0;32m   1013\u001b[0m                 \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mempty\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msize\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1014\u001b[0m                 \u001b[0mmask\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnotna\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m&\u001b[0m \u001b[0mnotna\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0my\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1015\u001b[1;33m                 \u001b[0mresult\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mmask\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mop\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mmask\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcom\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_values_from_object\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0my\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mmask\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   1016\u001b[0m             \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1017\u001b[0m                 \u001b[1;32massert\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mndarray\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mTypeError\u001b[0m: ufunc subtract cannot use operands with types dtype('int64') and dtype('<M8[ns]')"
     ]
    }
   ],
   "source": [
    "data['Age']=pd.to_datetime(data['Lead_Creation_Date']).dt.year-pd.to_datetime(data['DOB'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data.drop(['DOB','Lead_Creation_Date'],axis=1,inplace=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "lOAN 贷款日期"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data['Loan_Tenure_Applied'].replace([10,6,7,8,9],value=np.nan,inplace=True)\n",
    "data['Loan_Tenure_Applied'].replace(6,np.an,inplace=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Loggedln"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data.drop('LoggedIn',axis=1,inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "###类别性特征先编码成数值，LGBM无需One_hot编码"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.preprocessing import LabelEncoder\n",
    "le=LabelEncoder()\n",
    "feats_to_encode=['City','Employer_Name','Salary_Account','Device_Type','Filled_Form','Gender','id',\n",
    "'Disbursed','Existing_EMI','EMI_Loan_Submitted','Interest_Rate',]\n",
    "for col in feats_to_encode:\n",
    "    data[col]=le.fit_transform(data[col])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>City</th>\n",
       "      <th>DOB</th>\n",
       "      <th>Device_Type</th>\n",
       "      <th>Disbursed</th>\n",
       "      <th>EMI_Loan_Submitted</th>\n",
       "      <th>Employer_Name</th>\n",
       "      <th>Existing_EMI</th>\n",
       "      <th>Filled_Form</th>\n",
       "      <th>Gender</th>\n",
       "      <th>Interest_Rate</th>\n",
       "      <th>...</th>\n",
       "      <th>Monthly_Income</th>\n",
       "      <th>Processing_Fee</th>\n",
       "      <th>Salary_Account</th>\n",
       "      <th>Source</th>\n",
       "      <th>Var1</th>\n",
       "      <th>Var2</th>\n",
       "      <th>Var4</th>\n",
       "      <th>Var5</th>\n",
       "      <th>id</th>\n",
       "      <th>source</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Delhi</td>\n",
       "      <td>23-May-78</td>\n",
       "      <td>Web-browser</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>others</td>\n",
       "      <td>0.0</td>\n",
       "      <td>N</td>\n",
       "      <td>Female</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>20000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>HDFC Bank</td>\n",
       "      <td>S122</td>\n",
       "      <td>HBXX</td>\n",
       "      <td>G</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>ID000002C20</td>\n",
       "      <td>train</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Mumbai</td>\n",
       "      <td>7-Oct-85</td>\n",
       "      <td>Web-browser</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6762.9</td>\n",
       "      <td>TATA CONSULTANCY SERVICES LTD (TCS)</td>\n",
       "      <td>0.0</td>\n",
       "      <td>N</td>\n",
       "      <td>Male</td>\n",
       "      <td>13.25</td>\n",
       "      <td>...</td>\n",
       "      <td>35000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>ICICI Bank</td>\n",
       "      <td>S122</td>\n",
       "      <td>HBXA</td>\n",
       "      <td>G</td>\n",
       "      <td>3</td>\n",
       "      <td>13</td>\n",
       "      <td>ID000004E40</td>\n",
       "      <td>train</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>others</td>\n",
       "      <td>10-Oct-81</td>\n",
       "      <td>Web-browser</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>others</td>\n",
       "      <td>0.0</td>\n",
       "      <td>N</td>\n",
       "      <td>Male</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>22500</td>\n",
       "      <td>NaN</td>\n",
       "      <td>State Bank of India</td>\n",
       "      <td>S143</td>\n",
       "      <td>HBXX</td>\n",
       "      <td>B</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>ID000007H20</td>\n",
       "      <td>train</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>others</td>\n",
       "      <td>30-Nov-87</td>\n",
       "      <td>Web-browser</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>others</td>\n",
       "      <td>0.0</td>\n",
       "      <td>N</td>\n",
       "      <td>Male</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>35000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>State Bank of India</td>\n",
       "      <td>S143</td>\n",
       "      <td>HBXX</td>\n",
       "      <td>B</td>\n",
       "      <td>3</td>\n",
       "      <td>10</td>\n",
       "      <td>ID000008I30</td>\n",
       "      <td>train</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Bengaluru</td>\n",
       "      <td>17-Feb-84</td>\n",
       "      <td>Web-browser</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>others</td>\n",
       "      <td>25000.0</td>\n",
       "      <td>N</td>\n",
       "      <td>Male</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>100000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>HDFC Bank</td>\n",
       "      <td>S134</td>\n",
       "      <td>HBXX</td>\n",
       "      <td>B</td>\n",
       "      <td>3</td>\n",
       "      <td>17</td>\n",
       "      <td>ID000009J40</td>\n",
       "      <td>train</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 26 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        City        DOB  Device_Type  Disbursed EMI_Loan_Submitted  \\\n",
       "0      Delhi  23-May-78  Web-browser        0.0                NaN   \n",
       "1     Mumbai   7-Oct-85  Web-browser        0.0             6762.9   \n",
       "2     others  10-Oct-81  Web-browser        0.0                NaN   \n",
       "3     others  30-Nov-87  Web-browser        0.0                NaN   \n",
       "4  Bengaluru  17-Feb-84  Web-browser        0.0                NaN   \n",
       "\n",
       "                         Employer_Name  Existing_EMI Filled_Form  Gender  \\\n",
       "0                               others           0.0           N  Female   \n",
       "1  TATA CONSULTANCY SERVICES LTD (TCS)           0.0           N    Male   \n",
       "2                               others           0.0           N    Male   \n",
       "3                               others           0.0           N    Male   \n",
       "4                               others       25000.0           N    Male   \n",
       "\n",
       "   Interest_Rate  ...   Monthly_Income  Processing_Fee       Salary_Account  \\\n",
       "0            NaN  ...            20000             NaN            HDFC Bank   \n",
       "1          13.25  ...            35000             NaN           ICICI Bank   \n",
       "2            NaN  ...            22500             NaN  State Bank of India   \n",
       "3            NaN  ...            35000             NaN  State Bank of India   \n",
       "4            NaN  ...           100000             NaN            HDFC Bank   \n",
       "\n",
       "   Source  Var1 Var2  Var4  Var5           id source  \n",
       "0    S122  HBXX    G     1     0  ID000002C20  train  \n",
       "1    S122  HBXA    G     3    13  ID000004E40  train  \n",
       "2    S143  HBXX    B     1     0  ID000007H20  train  \n",
       "3    S143  HBXX    B     3    10  ID000008I30  train  \n",
       "4    S134  HBXX    B     3    17  ID000009J40  train  \n",
       "\n",
       "[5 rows x 26 columns]"
      ]
     },
     "execution_count": 78,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [],
   "source": [
    "train=data.loc[data['source']=='train']\n",
    "test=data.loc[data['source']=='test']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Program Files (x86)\\Microsoft Visual Studio\\Shared\\Anaconda3_64\\lib\\site-packages\\pandas\\core\\frame.py:3694: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  errors=errors)\n"
     ]
    }
   ],
   "source": [
    "train.drop('source',axis=1,inplace=True)\n",
    "test.drop(['source','Disbursed'],axis=1,inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [],
   "source": [
    "train.to_csv(r'C:\\Users\\Administrator\\Desktop\\FE_train.csv',index=False)\n",
    "test.to_csv(r'C:\\Users\\Administrator\\Desktop\\FE_test.csv',index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "ename": "ImportError",
     "evalue": "cannot import name '_IS_32BIT'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mImportError\u001b[0m                               Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-83-e36c227c4271>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m      3\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mlightgbm\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mlgbm\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      4\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mlightgbm\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msklearn\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mLGBMClassifier\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 5\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmodel_selection\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mGridSearchCV\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      6\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mmatplotlib\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpyploy\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mplt\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      7\u001b[0m \u001b[0mget_ipython\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrun_line_magic\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'matplotlib'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'inline'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mD:\\Program Files (x86)\\Microsoft Visual Studio\\Shared\\Anaconda3_64\\lib\\site-packages\\sklearn\\__init__.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m     62\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     63\u001b[0m     \u001b[1;32mfrom\u001b[0m \u001b[1;33m.\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0m__check_build\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 64\u001b[1;33m     \u001b[1;32mfrom\u001b[0m \u001b[1;33m.\u001b[0m\u001b[0mbase\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mclone\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     65\u001b[0m     \u001b[1;32mfrom\u001b[0m \u001b[1;33m.\u001b[0m\u001b[0mutils\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_show_versions\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mshow_versions\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     66\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mD:\\Program Files (x86)\\Microsoft Visual Studio\\Shared\\Anaconda3_64\\lib\\site-packages\\sklearn\\base.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m     13\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[1;33m.\u001b[0m\u001b[0mexternals\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0msix\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     14\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[1;33m.\u001b[0m\u001b[0mutils\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfixes\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0msignature\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 15\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[1;33m.\u001b[0m\u001b[0mutils\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0m_IS_32BIT\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     16\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[1;33m.\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0m__version__\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     17\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mImportError\u001b[0m: cannot import name '_IS_32BIT'"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as py\n",
    "import lightgbm as lgbm\n",
    "from lightgbm.sklearn import LGBMClassifier\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "import matplotlib.pyploy as plt\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
